summaryrefslogtreecommitdiff
path: root/sources/db
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2010-10-04 18:41:53 +0200
committerJochen Topf <jochen@topf.org>2010-10-04 18:41:53 +0200
commit9918c2c4c266a29848ce39fe2496876c66c3a48e (patch)
treeb49fe450d33dcb3c30b37f7bff68fbb475ecec66 /sources/db
downloadtaginfo-9918c2c4c266a29848ce39fe2496876c66c3a48e.tar
taginfo-9918c2c4c266a29848ce39fe2496876c66c3a48e.tar.gz
first commit
Diffstat (limited to 'sources/db')
-rwxr-xr-xsources/db/osmium_tagstatsbin0 -> 135169 bytes
-rw-r--r--sources/db/post.sql55
-rw-r--r--sources/db/pre.sql76
-rwxr-xr-xsources/db/update.sh37
4 files changed, 168 insertions, 0 deletions
diff --git a/sources/db/osmium_tagstats b/sources/db/osmium_tagstats
new file mode 100755
index 0000000..618ee6d
--- /dev/null
+++ b/sources/db/osmium_tagstats
Binary files differ
diff --git a/sources/db/post.sql b/sources/db/post.sql
new file mode 100644
index 0000000..2316657
--- /dev/null
+++ b/sources/db/post.sql
@@ -0,0 +1,55 @@
+--
+-- Taginfo source: Database
+--
+-- post.sql
+--
+
+.bail ON
+
+PRAGMA journal_mode = OFF;
+PRAGMA synchronous = OFF;
+PRAGMA count_changes = OFF;
+PRAGMA temp_store = MEMORY;
+PRAGMA cache_size = 5000000;
+
+ATTACH DATABASE '__DIR__/count.db' AS count;
+INSERT INTO stats SELECT * FROM count.stats;
+DETACH DATABASE count;
+
+CREATE UNIQUE INDEX keys_key_idx ON keys (key);
+CREATE INDEX tags_key_idx ON tags (key);
+-- CREATE UNIQUE INDEX tags_key_value_idx ON tags (key, value);
+CREATE INDEX keypairs_key1_idx ON keypairs (key1);
+CREATE INDEX keypairs_key2_idx ON keypairs (key2);
+CREATE UNIQUE INDEX key_distributions_key_idx ON key_distributions (key);
+
+INSERT INTO stats (key, value) SELECT 'num_keys', count(*) FROM keys;
+INSERT INTO stats (key, value) SELECT 'num_keys_on_nodes', count(*) FROM keys WHERE count_nodes > 0;
+INSERT INTO stats (key, value) SELECT 'num_keys_on_ways', count(*) FROM keys WHERE count_ways > 0;
+INSERT INTO stats (key, value) SELECT 'num_keys_on_relations', count(*) FROM keys WHERE count_relations > 0;
+
+INSERT INTO stats (key, value) SELECT 'num_tags', count(*) FROM tags;
+INSERT INTO stats (key, value) SELECT 'num_tags_on_nodes', count(*) FROM tags WHERE count_nodes > 0;
+INSERT INTO stats (key, value) SELECT 'num_tags_on_ways', count(*) FROM tags WHERE count_ways > 0;
+INSERT INTO stats (key, value) SELECT 'num_tags_on_relations', count(*) FROM tags WHERE count_relations > 0;
+
+INSERT INTO stats (key, value) SELECT 'num_keypairs', count(*) FROM keypairs;
+INSERT INTO stats (key, value) SELECT 'num_keypairs_on_nodes', count(*) FROM keypairs WHERE count_nodes > 0;
+INSERT INTO stats (key, value) SELECT 'num_keypairs_on_ways', count(*) FROM keypairs WHERE count_ways > 0;
+INSERT INTO stats (key, value) SELECT 'num_keypairs_on_relations', count(*) FROM keypairs WHERE count_relations > 0;
+
+UPDATE keys SET prevalent_values=(
+ SELECT group_concat(value, '|') FROM (
+ SELECT value FROM tags t
+ WHERE t.key = keys.key
+ AND t.count_all > keys.count_all / 100
+ ORDER BY t.count_all DESC
+ LIMIT 10
+
+ )
+ ) WHERE values_all < 1000;
+
+ANALYZE;
+
+UPDATE meta SET update_end=datetime('now');
+
diff --git a/sources/db/pre.sql b/sources/db/pre.sql
new file mode 100644
index 0000000..1bb00ea
--- /dev/null
+++ b/sources/db/pre.sql
@@ -0,0 +1,76 @@
+--
+-- Taginfo source: Database
+--
+-- pre.sql
+--
+
+.bail ON
+
+DROP TABLE IF EXISTS meta;
+
+CREATE TABLE meta (
+ source_id TEXT,
+ source_name TEXT,
+ update_start TEXT,
+ update_end TEXT,
+ data_until TEXT
+);
+
+INSERT INTO meta (source_id, source_name, update_start) SELECT 'db', 'Database', datetime('now');
+
+DROP TABLE IF EXISTS stats;
+
+CREATE TABLE stats (
+ key TEXT,
+ value INT64
+);
+
+DROP TABLE IF EXISTS keys;
+
+CREATE TABLE keys (
+ key VARCHAR,
+ count_all INTEGER,
+ count_nodes INTEGER,
+ count_ways INTEGER,
+ count_relations INTEGER,
+ values_all INTEGER,
+ values_nodes INTEGER,
+ values_ways INTEGER,
+ values_relations INTEGER,
+ users_all INTEGER,
+ users_nodes INTEGER,
+ users_ways INTEGER,
+ users_relations INTEGER,
+ grids INTEGER,
+ prevalent_values TEXT
+);
+
+DROP TABLE IF EXISTS key_distributions;
+
+CREATE TABLE key_distributions (
+ key VARCHAR,
+ png BLOB
+);
+
+DROP TABLE IF EXISTS tags;
+
+CREATE TABLE tags (
+ key VARCHAR,
+ value VARCHAR,
+ count_all INTEGER,
+ count_nodes INTEGER,
+ count_ways INTEGER,
+ count_relations INTEGER
+);
+
+DROP TABLE IF EXISTS keypairs;
+
+CREATE TABLE keypairs (
+ key1 VARCHAR,
+ key2 VARCHAR,
+ count_all INTEGER,
+ count_nodes INTEGER,
+ count_ways INTEGER,
+ count_relations INTEGER
+);
+
diff --git a/sources/db/update.sh b/sources/db/update.sh
new file mode 100755
index 0000000..3a48e74
--- /dev/null
+++ b/sources/db/update.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+#
+# Taginfo source: DB
+#
+# update.sh DIR
+#
+
+set -e
+
+DIR=$1
+
+if [ "x" = "x$DIR" ]; then
+ echo "Usage: update.sh DIR"
+ exit 1
+fi
+
+echo -n "Start db: "; date
+
+DATABASE=$DIR/taginfo-db.db
+
+rm -f $DATABASE
+rm -f $DIR/count.db
+
+echo "Running pre.sql..."
+sqlite3 $DATABASE <pre.sql
+
+echo "Running count..."
+HERE=`pwd`
+cd $DIR
+bzcat $DIR/planet.osm.bz2 | $HERE/osmium_tagstats -
+cd $HERE
+
+echo "Running post.sql..."
+perl -pe "s|__DIR__|$DIR|" post.sql | sqlite3 $DATABASE
+
+echo -n "Done db: "; date
+