changeset 1608:427f9010b4a9

WFS download: Started with GET downloader (paged and unpaged).
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Mon, 17 Dec 2018 18:27:57 +0100
parents 38f91897ca69
children d3c9bdc8644f
files cmd/wfs/dump.go cmd/wfs/main.go pkg/wfs/capabilities.go pkg/wfs/download.go
diffstat 4 files changed, 481 insertions(+), 124 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cmd/wfs/dump.go	Mon Dec 17 18:27:57 2018 +0100
@@ -0,0 +1,136 @@
+// This is Free Software under GNU Affero General Public License v >= 3.0
+// without warranty, see README.md and license for details.
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+// License-Filename: LICENSES/AGPL-3.0.txt
+//
+// Copyright (C) 2018 by via donau
+//   – Österreichische Wasserstraßen-Gesellschaft mbH
+// Software engineering by Intevation GmbH
+//
+// Author(s):
+//  * Sascha L. Teichmann <sascha.teichmann@intevation.de>
+
+package main
+
+import (
+	"fmt"
+
+	"gemma.intevation.de/gemma/pkg/wfs"
+)
+
+func dump(caps *wfs.Capabilities) {
+	fmt.Println("service identification")
+	fmt.Println("----------------------")
+	fmt.Printf("title: %s\n", caps.ServiceIdentification.Title)
+	var abstract string
+	if len(caps.ServiceIdentification.Abstract) > 40 {
+		abstract = fmt.Sprintf("%.40s...", caps.ServiceIdentification.Abstract)
+	} else {
+		abstract = caps.ServiceIdentification.Abstract
+	}
+	fmt.Printf("abstract: %s\n", abstract)
+	if len(caps.ServiceIdentification.Keywords.Keywords) > 0 {
+		fmt.Println("keywords:")
+		for _, kw := range caps.ServiceIdentification.Keywords.Keywords {
+			fmt.Printf("\t%s\n", kw.Value)
+		}
+	}
+	fmt.Printf("type: %s\n", caps.ServiceIdentification.ServiceType)
+	fmt.Printf("version: %s\n", caps.ServiceIdentification.ServiceTypeVersion)
+	fmt.Println()
+	fmt.Println("operations meta data")
+	fmt.Println("--------------------")
+	if len(caps.OperationsMetadata.Operations) > 0 {
+		fmt.Println("operations:")
+		for _, operation := range caps.OperationsMetadata.Operations {
+			fmt.Printf("\t%s\n", operation.Name)
+			if operation.DCP.HTTP.Get != nil {
+				fmt.Printf("\t\tGet: %s\n", operation.DCP.HTTP.Get.HRef)
+			}
+			if operation.DCP.HTTP.Post != nil {
+				fmt.Printf("\t\tPost: %s\n", operation.DCP.HTTP.Post.HRef)
+			}
+
+			if len(operation.Parameters) > 0 {
+				fmt.Println("\t\tparameters:")
+				for _, p := range operation.Parameters {
+					fmt.Printf("\t\t\tparameter: %s\n", p.Name)
+					for _, av := range p.AllowedValues.Values {
+						fmt.Printf("\t\t\t\t%s\n", av.Value)
+					}
+				}
+			}
+			if len(operation.Constraints) > 0 {
+				fmt.Println("\t\tconstraints:")
+				for _, c := range operation.Constraints {
+					fmt.Printf("\t\t\tname: %s\n", c.Name)
+					if c.DefaultValue != nil {
+						fmt.Printf("\t\t\t\tdefault: %s\n", c.DefaultValue.Value)
+					}
+					if len(c.AllowedValues.Values) > 0 {
+						fmt.Println("\t\t\tallowed values:")
+						for _, av := range c.AllowedValues.Values {
+							fmt.Printf("\t\t\t\t%s", av.Value)
+						}
+					}
+				}
+			}
+		}
+	}
+	if len(caps.OperationsMetadata.Constraints) > 0 {
+		fmt.Println("constraints:")
+		for _, c := range caps.OperationsMetadata.Constraints {
+			fmt.Printf("\tname: %s\n", c.Name)
+			if c.DefaultValue != nil {
+				fmt.Printf("\t\tdefault: %s\n", c.DefaultValue.Value)
+			}
+			if len(c.AllowedValues.Values) > 0 {
+				fmt.Println("\tallowed values:")
+				for _, av := range c.AllowedValues.Values {
+					fmt.Printf("\t\t%s\n", av.Value)
+				}
+			}
+		}
+	}
+	fmt.Println()
+	fmt.Println("feature type list")
+	fmt.Println("------------------")
+	if len(caps.FeatureTypeList.FeatureTypes) > 0 {
+		fmt.Println("features:")
+		for _, ft := range caps.FeatureTypeList.FeatureTypes {
+			fmt.Printf("\tname: %s\n", ft.Name)
+			fmt.Printf("\ttitle: %s\n", ft.Title)
+			var abstract string
+			if len(ft.Abstract) > 40 {
+				abstract = fmt.Sprintf("%.40s...", ft.Abstract)
+			} else {
+				abstract = ft.Abstract
+			}
+			fmt.Printf("\tabstract: %s\n", abstract)
+			fmt.Printf("\tdefault CRS: %s\n", ft.DefaultCRS)
+			if len(ft.OtherCRSs) > 0 {
+				fmt.Println("\tother CRSs:")
+				for _, crs := range ft.OtherCRSs {
+					fmt.Printf("\t\t%s\n", crs)
+				}
+			}
+			if ft.WGS84BoundingBox != nil {
+				fmt.Printf("\tWGS84 bounding box: (%s) - (%s)\n",
+					ft.WGS84BoundingBox.LowerCorner, ft.WGS84BoundingBox.UpperCorner)
+			}
+			if len(ft.Keywords.Keywords) > 0 {
+				fmt.Println("\tkeywords:")
+				for _, kw := range ft.Keywords.Keywords {
+					fmt.Printf("\t\t%s\n", kw.Value)
+				}
+			}
+			if len(ft.Namespaces) > 0 {
+				fmt.Println("\tnamespaces:")
+				for _, ns := range ft.Namespaces {
+					fmt.Printf("\t\t%s:%s\n", ns.Space, ns.Local)
+				}
+			}
+		}
+	}
+}
--- a/cmd/wfs/main.go	Mon Dec 17 16:48:09 2018 +0100
+++ b/cmd/wfs/main.go	Mon Dec 17 18:27:57 2018 +0100
@@ -14,145 +14,30 @@
 package main
 
 import (
-	"bufio"
 	"flag"
-	"fmt"
 	"log"
-	"os"
 
 	"gemma.intevation.de/gemma/pkg/wfs"
 )
 
-func loadCapabilities(fname string) (*wfs.Capabilities, error) {
-	f, err := os.Open(fname)
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-	return wfs.ParseCapabilities(bufio.NewReader(f))
-}
-
 func main() {
+	var (
+		dumpCaps    = flag.Bool("dump-caps", false, "Dump capabilities document")
+		featureType = flag.String("features", "ws-wamos:ienc_wtwaxs", "feature to get")
+	)
 	flag.Parse()
 
 	for _, arg := range flag.Args() {
-		caps, err := loadCapabilities(arg)
+		caps, err := wfs.GetCapabilities(arg)
 		if err != nil {
 			log.Fatalf("error: %v\n", err)
 		}
-
-		fmt.Println("service identification")
-		fmt.Println("----------------------")
-		fmt.Printf("title: %s\n", caps.ServiceIdentification.Title)
-		var abstract string
-		if len(caps.ServiceIdentification.Abstract) > 40 {
-			abstract = fmt.Sprintf("%.40s...", caps.ServiceIdentification.Abstract)
-		} else {
-			abstract = caps.ServiceIdentification.Abstract
+		if *dumpCaps {
+			dump(caps)
 		}
-		fmt.Printf("abstract: %s\n", abstract)
-		if len(caps.ServiceIdentification.Keywords.Keywords) > 0 {
-			fmt.Println("keywords:")
-			for _, kw := range caps.ServiceIdentification.Keywords.Keywords {
-				fmt.Printf("\t%s\n", kw.Value)
-			}
-		}
-		fmt.Printf("type: %s\n", caps.ServiceIdentification.ServiceType)
-		fmt.Printf("version: %s\n", caps.ServiceIdentification.ServiceTypeVersion)
-		fmt.Println()
-		fmt.Println("operations meta data")
-		fmt.Println("--------------------")
-		if len(caps.OperationsMetadata.Operations) > 0 {
-			fmt.Println("operations:")
-			for _, operation := range caps.OperationsMetadata.Operations {
-				fmt.Printf("\t%s\n", operation.Name)
-				if operation.DCP.HTTP.Get != nil {
-					fmt.Printf("\t\tGet: %s\n", operation.DCP.HTTP.Get.HRef)
-				}
-				if operation.DCP.HTTP.Post != nil {
-					fmt.Printf("\t\tPost: %s\n", operation.DCP.HTTP.Post.HRef)
-				}
 
-				if len(operation.Parameters) > 0 {
-					fmt.Println("\t\tparameters:")
-					for _, p := range operation.Parameters {
-						fmt.Printf("\t\t\tparameter: %s\n", p.Name)
-						for _, av := range p.AllowedValues.Values {
-							fmt.Printf("\t\t\t\t%s\n", av.Value)
-						}
-					}
-				}
-				if len(operation.Constraints) > 0 {
-					fmt.Println("\t\tconstraints:")
-					for _, c := range operation.Constraints {
-						fmt.Printf("\t\t\tname: %s\n", c.Name)
-						if c.DefaultValue != nil {
-							fmt.Printf("\t\t\t\tdefault: %s\n", c.DefaultValue.Value)
-						}
-						if len(c.AllowedValues.Values) > 0 {
-							fmt.Println("\t\t\tallowed values:")
-							for _, av := range c.AllowedValues.Values {
-								fmt.Printf("\t\t\t\t%s", av.Value)
-							}
-						}
-					}
-				}
-			}
-		}
-		if len(caps.OperationsMetadata.Constraints) > 0 {
-			fmt.Println("constraints:")
-			for _, c := range caps.OperationsMetadata.Constraints {
-				fmt.Printf("\tname: %s\n", c.Name)
-				if c.DefaultValue != nil {
-					fmt.Printf("\t\tdefault: %s\n", c.DefaultValue.Value)
-				}
-				if len(c.AllowedValues.Values) > 0 {
-					fmt.Println("\tallowed values:")
-					for _, av := range c.AllowedValues.Values {
-						fmt.Printf("\t\t%s\n", av.Value)
-					}
-				}
-			}
-		}
-		fmt.Println()
-		fmt.Println("feature type list")
-		fmt.Println("------------------")
-		if len(caps.FeatureTypeList.FeatureTypes) > 0 {
-			fmt.Println("features:")
-			for _, ft := range caps.FeatureTypeList.FeatureTypes {
-				fmt.Printf("\tname: %s\n", ft.Name)
-				fmt.Printf("\ttitle: %s\n", ft.Title)
-				var abstract string
-				if len(ft.Abstract) > 40 {
-					abstract = fmt.Sprintf("%.40s...", ft.Abstract)
-				} else {
-					abstract = ft.Abstract
-				}
-				fmt.Printf("\tabstract: %s\n", abstract)
-				fmt.Printf("\tdefault CRS: %s\n", ft.DefaultCRS)
-				if len(ft.OtherCRSs) > 0 {
-					fmt.Println("\tother CRSs:")
-					for _, crs := range ft.OtherCRSs {
-						fmt.Printf("\t\t%s\n", crs)
-					}
-				}
-				if ft.WGS84BoundingBox != nil {
-					fmt.Printf("\tWGS84 bounding box: (%s) - (%s)\n",
-						ft.WGS84BoundingBox.LowerCorner, ft.WGS84BoundingBox.UpperCorner)
-				}
-				if len(ft.Keywords.Keywords) > 0 {
-					fmt.Println("\tkeywords:")
-					for _, kw := range ft.Keywords.Keywords {
-						fmt.Printf("\t\t%s\n", kw.Value)
-					}
-				}
-				if len(ft.Namespaces) > 0 {
-					fmt.Println("\tnamespaces:")
-					for _, ns := range ft.Namespaces {
-						fmt.Printf("\t\t%s:%s\n", ns.Space, ns.Local)
-					}
-				}
-			}
+		if err := wfs.GetFeaturesGET(caps, *featureType); err != nil {
+			log.Fatalf("error: %v\n", err)
 		}
 	}
 }
--- a/pkg/wfs/capabilities.go	Mon Dec 17 16:48:09 2018 +0100
+++ b/pkg/wfs/capabilities.go	Mon Dec 17 18:27:57 2018 +0100
@@ -16,6 +16,8 @@
 import (
 	"encoding/xml"
 	"io"
+	"regexp"
+	"strconv"
 
 	"golang.org/x/net/html/charset"
 )
@@ -157,11 +159,147 @@
 type Capabilities struct {
 	XMLName xml.Name `xml:"http://www.opengis.net/wfs/2.0 WFS_Capabilities"`
 
+	BaseURL string `xml:"-"`
+
 	ServiceIdentification ServiceIdentification
 	OperationsMetadata    OperationsMetadata
 	FeatureTypeList       FeatureTypeList
 }
 
+func (c *Capabilities) FindOperation(name string) *Operation {
+	for _, op := range c.OperationsMetadata.Operations {
+		if op.Name == name {
+			return op
+		}
+	}
+	return nil
+}
+
+func (o *Operation) SupportsHits() bool {
+	for _, p := range o.Parameters {
+		if p.Name == "resultType" {
+			for _, av := range p.AllowedValues.Values {
+				if av.Value == "hits" {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+func (o *Operation) FeaturesPerPage() (int, bool) {
+	for _, c := range o.Constraints {
+		if c.Name == "CountDefault" {
+			if c.DefaultValue != nil {
+				if v, err := strconv.Atoi(c.DefaultValue.Value); err == nil {
+					return v, true
+				}
+			}
+			for _, av := range c.AllowedValues.Values {
+				if v, err := strconv.Atoi(av.Value); err == nil {
+					return v, true
+				}
+
+			}
+		}
+	}
+	return 0, false
+}
+
+func (c *Capabilities) FindFeatureType(name string) *FeatureType {
+	for _, ft := range c.FeatureTypeList.FeatureTypes {
+		if ft.Name == name {
+			return ft
+		}
+	}
+	return nil
+}
+
+func (op *Operation) FindParameter(name string) *Parameter {
+	for _, p := range op.Parameters {
+		if p.Name == name {
+			return p
+		}
+	}
+	return nil
+}
+
+const WFS2_0_0 = "2.0.0"
+
+var versionRe = regexp.MustCompile(`(\d+)\.(\d+)\.(\d+)`)
+
+func versionIsLess(a, b string) bool {
+	am := versionRe.FindStringSubmatch(a)
+	bm := versionRe.FindStringSubmatch(b)
+
+	var n int
+	if len(am) < len(bm) {
+		n = len(am)
+	} else {
+		n = len(bm)
+	}
+	n--
+
+	for i := 0; i < n; i++ {
+		ai, _ := strconv.Atoi(am[i+1])
+		bi, _ := strconv.Atoi(bm[i+1])
+		switch {
+		case ai < bi:
+			return true
+		case ai > bi:
+			return false
+		}
+	}
+	return false
+}
+
+func maxVersion(a, b string) string {
+	am := versionRe.FindStringSubmatch(a)
+	bm := versionRe.FindStringSubmatch(b)
+
+	var n int
+	if len(am) < len(bm) {
+		n = len(am)
+	} else {
+		n = len(bm)
+	}
+	n--
+
+	for i := 0; i < n; i++ {
+		ai, _ := strconv.Atoi(am[i+1])
+		bi, _ := strconv.Atoi(bm[i+1])
+		switch {
+		case ai > bi:
+			return a
+		case bi > ai:
+			return b
+		}
+	}
+	return a
+}
+
+func (c *Capabilities) HighestWFSVersion(def string) string {
+	op := c.FindOperation("GetCapabilities")
+	if op == nil {
+		return def
+	}
+	p := op.FindParameter("AcceptVersions")
+	if p == nil {
+		return def
+	}
+	if len(p.AllowedValues.Values) == 0 {
+		return def
+	}
+
+	max := p.AllowedValues.Values[0].Value
+	for _, v := range p.AllowedValues.Values[1:] {
+		max = maxVersion(max, v.Value)
+	}
+
+	return max
+}
+
 func ParseCapabilities(r io.Reader) (*Capabilities, error) {
 
 	decoder := xml.NewDecoder(r)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pkg/wfs/download.go	Mon Dec 17 18:27:57 2018 +0100
@@ -0,0 +1,198 @@
+// This is Free Software under GNU Affero General Public License v >= 3.0
+// without warranty, see README.md and license for details.
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+// License-Filename: LICENSES/AGPL-3.0.txt
+//
+// Copyright (C) 2018 by via donau
+//   – Österreichische Wasserstraßen-Gesellschaft mbH
+// Software engineering by Intevation GmbH
+//
+// Author(s):
+//  * Sascha L. Teichmann <sascha.teichmann@intevation.de>
+
+package wfs
+
+import (
+	"bufio"
+	"encoding/xml"
+	"errors"
+	"log"
+	"net/http"
+	"net/url"
+	"strconv"
+
+	"golang.org/x/net/html/charset"
+)
+
+var (
+	ErrNoSuchFeatureType      = errors.New("No such feature type")
+	ErrGetFeatureNotSupported = errors.New("GetFeature not supported")
+	ErrMethodGetNotSupported  = errors.New("GET not supported")
+	ErrNoNumberMatchedFound   = errors.New("No numberMatched attribute found")
+)
+
+func GetCapabilities(capURL string) (*Capabilities, error) {
+
+	base, err := url.Parse(capURL)
+	if err != nil {
+		return nil, err
+	}
+	v := url.Values{}
+	v.Set("SERVICE", "WFS")
+	v.Set("REQUEST", "GetCapabilities")
+	v.Set("ACCEPTVERSIONS", "2.0.0,1.1.0,1.0.0")
+	base.RawQuery = v.Encode()
+
+	baseURL := base.String()
+	resp, err := http.Get(baseURL)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	caps, err := ParseCapabilities(bufio.NewReader(resp.Body))
+	if err == nil {
+		caps.BaseURL = baseURL
+	}
+	return caps, err
+}
+
+func numberFeaturesGET(u *url.URL, featureType, version string) (int, error) {
+
+	v := url.Values{}
+	v.Set("SERVICE", "WFS")
+	v.Set("REQUEST", "GetFeature")
+	v.Set("resultType", "hits")
+	v.Set("VERSION", version)
+	v.Set("TYPENAMES", featureType)
+
+	q := *u
+	q.RawQuery = v.Encode()
+
+	resp, err := http.Get(q.String())
+	if err != nil {
+		return 0, err
+	}
+	defer resp.Body.Close()
+	dec := xml.NewDecoder(resp.Body)
+	dec.CharsetReader = charset.NewReaderLabel
+
+	var result struct {
+		NumberMatched *int `xml:"numberMatched,attr"`
+	}
+
+	if err := dec.Decode(&result); err != nil {
+		return 0, err
+	}
+
+	if result.NumberMatched == nil {
+		return 0, ErrNoNumberMatchedFound
+	}
+
+	return *result.NumberMatched, nil
+}
+
+func GetFeaturesGET(caps *Capabilities, featureTypeName string) error {
+
+	if caps.FindFeatureType(featureTypeName) == nil {
+		return ErrNoSuchFeatureType
+	}
+	op := caps.FindOperation("GetFeature")
+	if op == nil {
+		return ErrGetFeatureNotSupported
+	}
+
+	if op.DCP.HTTP.Get == nil {
+		return ErrMethodGetNotSupported
+	}
+
+	getRaw := op.DCP.HTTP.Get.HRef
+	getU, err := url.Parse(getRaw)
+	if err != nil {
+		return err
+	}
+	// The URL could be relative so resolve against Capabilities URL.
+	if !getU.IsAbs() {
+		base, err := url.Parse(caps.BaseURL)
+		if err != nil {
+			return err
+		}
+		getU = getU.ResolveReference(base)
+	}
+
+	wfsVersion := caps.HighestWFSVersion(WFS2_0_0)
+
+	featuresPerPage, supportsPaging := op.FeaturesPerPage()
+
+	var numFeatures int
+
+	if supportsPaging {
+		log.Printf("Paging supported with %d feature per page.\n",
+			featuresPerPage)
+
+		if !op.SupportsHits() {
+			supportsPaging = false
+		} else {
+			numFeatures, err = numberFeaturesGET(getU, featureTypeName, wfsVersion)
+			if err != nil {
+				log.Printf("error: %v\n", err)
+				supportsPaging = false
+			} else {
+				log.Printf("Number of features: %d\n", numFeatures)
+			}
+		}
+	}
+
+	var downloadURLs []string
+
+	if supportsPaging {
+		wfs2 := !versionIsLess(wfsVersion, WFS2_0_0)
+		pagedURL := func(ofs, count int) string {
+			v := url.Values{}
+			v.Set("SERVICE", "WFS")
+			v.Set("REQUEST", "GetFeature")
+			v.Set("VERSION", wfsVersion)
+			v.Set("startIndex", strconv.Itoa(ofs))
+			if wfs2 {
+				v.Set("count", strconv.Itoa(count))
+			} else {
+				v.Set("maxFeatures", strconv.Itoa(count))
+			}
+			v.Set("TYPENAMES", featureTypeName)
+			q := *getU
+			q.RawQuery = v.Encode()
+			return q.String()
+		}
+		if numFeatures <= featuresPerPage {
+			log.Println("All features can be fetched in one page")
+			downloadURLs = []string{pagedURL(0, numFeatures)}
+		} else {
+			log.Println("Features need to be downloaded in pages.")
+			for pos := 0; pos < numFeatures; {
+				var count int
+				if rest := numFeatures - pos; rest >= numFeatures {
+					count = numFeatures
+				} else {
+					count = rest
+				}
+				downloadURLs = append(downloadURLs, pagedURL(pos, count))
+				pos += count
+			}
+		}
+	} else { // No paging support.
+		v := url.Values{}
+		v.Set("SERVICE", "WFS")
+		v.Set("REQUEST", "GetFeature")
+		v.Set("VERSION", wfsVersion)
+		v.Set("TYPENAMES", featureTypeName)
+		q := *getU
+		q.RawQuery = v.Encode()
+		downloadURLs = []string{q.String()}
+	}
+
+	// TODO: Implement me!
+
+	log.Printf("%v\n", downloadURLs)
+
+	return nil
+}