Mercurial > gemma
diff schema/geonames-import/import-geonames.sh @ 1109:74a75a5ce770
Added geonames data and extended search for villages/cities.
The free data from geonames https://download.geonames.org/ is imported
into the gemma data base (for the relevant states) and used to search
for cities and villages.
This data might be replaced by data from the RIS-index later on..?
author | Sascha Wilde <wilde@intevation.de> |
---|---|
date | Mon, 05 Nov 2018 13:07:16 +0100 |
parents | |
children | ca7821e1f720 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/geonames-import/import-geonames.sh Mon Nov 05 13:07:16 2018 +0100 @@ -0,0 +1,147 @@ +#!/bin/bash + +# This is Free Software under GNU Affero General Public License v >= 3.0 +# without warranty, see README.md and license for details. + +# SPDX-License-Identifier: AGPL-3.0-or-later +# License-Filename: LICENSES/AGPL-3.0.txt + +# Copyright (C) 2018 by via donau +# – Österreichische Wasserstraßen-Gesellschaft mbH +# Software engineering by Intevation GmbH + +# Author(s): +# * Sascha Wilde <wilde@intevation.de> + + +ME=$(basename $0) +MYHOME=$(realpath $(dirname $0)) +CCs="AT BG HR HU RO SK" + +# Defaults: +datadir=$(realpath "$MYHOME/data") +db=gemma +port=5432 +drop=0 + +usage() +{ + cat <<EOF +$ME [OPTION]... + +Import geonames data (used for extended search features) into gemma database. + +If data was downloaded previously, it will be reused. To force update +of data, first delete the existing data directory or use a fresh one. + +Options: + -d, --db=NAME create the database NAME. Default: "gemma" + -p, --port=PORT connect do the postgresql cluster at PORT. + Default is the postgresql standard port 5432 + -D, --datadir=PATH download data to PATH. Default: "$datadir" + --drop drop geonames data + --help display this help and exit +EOF +} + +fatal() +{ + echo >&2 "$1" + exit 23 +} + +dl() { + [ -d "$datadir" ] || mkdir "$datadir" + cd "$datadir" + for cc in $CCs ; do + if [ -f "${cc}.zip" ] ; then + echo "Keeping existing ${cc}.zip ..." + else + echo "Fetching ${cc}.zip ..." + curl -O "https://download.geonames.org/export/dump/${cc}.zip" + fi + done +} + +unpack() +{ + cd "$datadir" || fatal "$datadir not missing!" + for cc in $CCs ; do + if [ -f "${cc}.zip" ] ; then + if [ -f "${cc}.txt" ] ; then + echo "Reusing existing ${cc}.txt" + else + echo "Extracting ${cc}.zip ..." + unzip "${cc}.zip" "${cc}.txt" + fi + else + fatal "Missing archive: ${cc}.zip" + fi + done +} + +drop_data() +{ + echo "Dropping geonames data..." + psql -q -p "$port" -d "$db" -c "TRUNCATE TABLE waterway.geonames;" +} + +import_data() +{ + for cc in $CCs ; do + fqfn="${datadir}/${cc}.txt" + echo "Importing $fqfn into database..." + psql -qtv ON_ERROR_STOP= -p "$port" -d "$db" \ + -v filename="$fqfn" \ + -f "$MYHOME/import-geonames.sql" + done +} + +# Parse options: + +OPTS=`getopt \ + -l help,db:,port:,datadir:,drop \ + -o D:d:p: -n "$ME" -- "$@"` +[ $? -eq 0 ] || { usage ; exit 1 ; } + +eval set -- "$OPTS" + +while true ; do + case "$1" in + --db|-d) + db="$2" + shift 2 + ;; + --port|-p) + port="$2" + shift 2 + ;; + --datadir|-D) + datadir="$2" + shift 2 + ;; + --drop) + drop=1 + shift 1 + ;; + --help) + { usage ; exit 0 ; } + ;; + --) + shift + break + ;; + esac +done + + +# Main ------------------------------------------------------------ + +if [ $drop -eq 0 ] ; then + dl + unpack + drop_data + import_data +else + drop_data +fi