diff schema/geonames-import/import-geonames.sh @ 1109:74a75a5ce770

Added geonames data and extended search for villages/cities. The free data from geonames https://download.geonames.org/ is imported into the gemma data base (for the relevant states) and used to search for cities and villages. This data might be replaced by data from the RIS-index later on..?
author Sascha Wilde <wilde@intevation.de>
date Mon, 05 Nov 2018 13:07:16 +0100
parents
children ca7821e1f720
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/geonames-import/import-geonames.sh	Mon Nov 05 13:07:16 2018 +0100
@@ -0,0 +1,147 @@
+#!/bin/bash
+
+# This is Free Software under GNU Affero General Public License v >= 3.0
+# without warranty, see README.md and license for details.
+
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# License-Filename: LICENSES/AGPL-3.0.txt
+
+# Copyright (C) 2018 by via donau
+#   – Österreichische Wasserstraßen-Gesellschaft mbH
+# Software engineering by Intevation GmbH
+
+# Author(s):
+#  * Sascha Wilde <wilde@intevation.de>
+
+
+ME=$(basename $0)
+MYHOME=$(realpath $(dirname $0))
+CCs="AT BG HR HU RO SK"
+
+# Defaults:
+datadir=$(realpath "$MYHOME/data")
+db=gemma
+port=5432
+drop=0
+
+usage()
+{
+  cat <<EOF
+$ME [OPTION]...
+
+Import geonames data (used for extended search features) into gemma database.
+
+If data was downloaded previously, it will be reused.  To force update
+of data, first delete the existing data directory or use a fresh one.
+
+Options:
+  -d, --db=NAME       create the database NAME.  Default: "gemma"
+  -p, --port=PORT     connect do the postgresql cluster at PORT.
+                      Default is the postgresql standard port 5432
+  -D, --datadir=PATH  download data to PATH.  Default: "$datadir"
+      --drop          drop geonames data
+      --help          display this help and exit
+EOF
+}
+
+fatal()
+{
+  echo >&2 "$1"
+  exit 23
+}
+
+dl() {
+  [ -d "$datadir" ] || mkdir "$datadir"
+  cd "$datadir"
+  for cc in $CCs ; do
+    if [ -f "${cc}.zip" ] ; then
+      echo "Keeping existing ${cc}.zip ..."
+    else
+      echo "Fetching ${cc}.zip ..."
+      curl -O "https://download.geonames.org/export/dump/${cc}.zip"
+    fi
+  done
+}
+
+unpack()
+{
+  cd "$datadir" || fatal "$datadir not missing!"
+  for cc in $CCs ; do
+    if [ -f "${cc}.zip" ] ; then
+      if [ -f "${cc}.txt" ] ; then
+        echo "Reusing existing ${cc}.txt"
+      else
+        echo "Extracting ${cc}.zip ..."
+        unzip "${cc}.zip" "${cc}.txt"
+      fi
+    else
+      fatal "Missing archive: ${cc}.zip"
+    fi
+  done
+}
+
+drop_data()
+{
+  echo "Dropping geonames data..."
+  psql -q -p "$port" -d "$db" -c "TRUNCATE TABLE waterway.geonames;"
+}
+
+import_data()
+{
+  for cc in $CCs ; do
+    fqfn="${datadir}/${cc}.txt"
+    echo "Importing $fqfn into database..."
+    psql -qtv ON_ERROR_STOP= -p "$port" -d "$db" \
+         -v filename="$fqfn" \
+         -f "$MYHOME/import-geonames.sql"
+  done
+}
+
+# Parse options:
+
+OPTS=`getopt \
+      -l help,db:,port:,datadir:,drop \
+      -o D:d:p: -n "$ME" -- "$@"`
+[ $? -eq 0 ] || { usage ; exit 1 ; }
+
+eval set -- "$OPTS"
+
+while true ; do
+  case "$1" in
+    --db|-d)
+      db="$2"
+      shift 2
+      ;;
+    --port|-p)
+      port="$2"
+      shift 2
+      ;;
+    --datadir|-D)
+      datadir="$2"
+      shift 2
+      ;;
+    --drop)
+      drop=1
+      shift 1
+      ;;
+    --help)
+      { usage ; exit 0 ; }
+      ;;
+    --)
+      shift
+      break
+      ;;
+  esac
+done
+
+
+# Main ------------------------------------------------------------
+
+if [ $drop -eq 0 ] ; then
+  dl
+  unpack
+  drop_data
+  import_data
+else
+  drop_data
+fi