From 1030f60092da4ccc7939d6f5333b740b46642cd9 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Fri, 6 Mar 2015 11:22:42 +0100 Subject: Split up sources/db/post.sql. This SQL script needs a long time to run. Split it up so we can start figuring out where the problem is. --- sources/db/post.sql | 48 ---------------------------------------- sources/db/post_indexes.sql | 31 ++++++++++++++++++++++++++ sources/db/post_similar_keys.sql | 45 +++++++++++++++++++++++++++++++++++++ sources/db/update.sh | 6 +++++ 4 files changed, 82 insertions(+), 48 deletions(-) create mode 100644 sources/db/post_indexes.sql create mode 100644 sources/db/post_similar_keys.sql diff --git a/sources/db/post.sql b/sources/db/post.sql index ad58ef6..07f5ca0 100644 --- a/sources/db/post.sql +++ b/sources/db/post.sql @@ -14,54 +14,6 @@ PRAGMA cache_size = 5000000; -- ============================================================================ --- For all keys found to be similar earlier, we get the counts how often they --- appear in the OSM database and store this data in the same table for easy --- access. -UPDATE similar_keys SET count_all1=(SELECT k.count_all FROM keys k WHERE k.key=similar_keys.key1); -UPDATE similar_keys SET count_all2=(SELECT k.count_all FROM keys k WHERE k.key=similar_keys.key2); - -CREATE INDEX similar_keys_key1_idx ON similar_keys (key1); -CREATE INDEX similar_keys_key2_idx ON similar_keys (key2); - -ANALYZE similar_keys; - -DROP TABLE IF EXISTS similar_keys_common_rare; - -CREATE TABLE similar_keys_common_rare ( - key_common VARCHAR, - key_rare VARCHAR, - count_all_common INTEGER DEFAULT 0, - count_all_rare INTEGER DEFAULT 0, - similarity INTEGER -); - -INSERT INTO similar_keys_common_rare (key_common, key_rare, count_all_common, count_all_rare, similarity) - SELECT key1, key2, count_all1, count_all2, similarity - FROM similar_keys WHERE count_all1 >= 1000 AND count_all2 <= 10 AND count_all2 > 0; - -INSERT INTO similar_keys_common_rare (key_common, key_rare, count_all_common, count_all_rare, similarity) - SELECT key2, key1, count_all2, count_all1, similarity - FROM similar_keys WHERE count_all2 >= 1000 AND count_all1 <= 10 AND count_all1 > 0; - --- ============================================================================ - -CREATE UNIQUE INDEX keys_key_idx ON keys (key); -CREATE INDEX tags_key_idx ON tags (key); --- CREATE UNIQUE INDEX tags_key_value_idx ON tags (key, value); -CREATE INDEX key_combinations_key1_idx ON key_combinations (key1); -CREATE INDEX key_combinations_key2_idx ON key_combinations (key2); -CREATE UNIQUE INDEX key_distributions_key_idx ON key_distributions (key, object_type); - -CREATE UNIQUE INDEX tag_distributions_key_value_idx ON tag_distributions (key, value, object_type); - -CREATE INDEX tag_combinations_key1_value1_idx ON tag_combinations (key1, value1); -CREATE INDEX tag_combinations_key2_value2_idx ON tag_combinations (key2, value2); - -CREATE UNIQUE INDEX relation_types_rtype_idx ON relation_types (rtype); -CREATE INDEX relation_roles_rtype_idx ON relation_roles (rtype); - --- ============================================================================ - INSERT INTO stats (key, value) SELECT 'num_keys', count(*) FROM keys; INSERT INTO stats (key, value) SELECT 'num_keys_on_nodes', count(*) FROM keys WHERE count_nodes > 0; INSERT INTO stats (key, value) SELECT 'num_keys_on_ways', count(*) FROM keys WHERE count_ways > 0; diff --git a/sources/db/post_indexes.sql b/sources/db/post_indexes.sql new file mode 100644 index 0000000..768b7f3 --- /dev/null +++ b/sources/db/post_indexes.sql @@ -0,0 +1,31 @@ +-- +-- Taginfo source: Database +-- +-- post_indexes.sql +-- + +.bail ON + +PRAGMA journal_mode = OFF; +PRAGMA synchronous = OFF; +PRAGMA count_changes = OFF; +PRAGMA temp_store = MEMORY; +PRAGMA cache_size = 5000000; + +-- ============================================================================ + +CREATE UNIQUE INDEX keys_key_idx ON keys (key); +CREATE INDEX tags_key_idx ON tags (key); +-- CREATE UNIQUE INDEX tags_key_value_idx ON tags (key, value); +CREATE INDEX key_combinations_key1_idx ON key_combinations (key1); +CREATE INDEX key_combinations_key2_idx ON key_combinations (key2); +CREATE UNIQUE INDEX key_distributions_key_idx ON key_distributions (key, object_type); + +CREATE UNIQUE INDEX tag_distributions_key_value_idx ON tag_distributions (key, value, object_type); + +CREATE INDEX tag_combinations_key1_value1_idx ON tag_combinations (key1, value1); +CREATE INDEX tag_combinations_key2_value2_idx ON tag_combinations (key2, value2); + +CREATE UNIQUE INDEX relation_types_rtype_idx ON relation_types (rtype); +CREATE INDEX relation_roles_rtype_idx ON relation_roles (rtype); + diff --git a/sources/db/post_similar_keys.sql b/sources/db/post_similar_keys.sql new file mode 100644 index 0000000..8ab9ecc --- /dev/null +++ b/sources/db/post_similar_keys.sql @@ -0,0 +1,45 @@ +-- +-- Taginfo source: Database +-- +-- post_similar_keys.sql +-- + +.bail ON + +PRAGMA journal_mode = OFF; +PRAGMA synchronous = OFF; +PRAGMA count_changes = OFF; +PRAGMA temp_store = MEMORY; +PRAGMA cache_size = 5000000; + +-- ============================================================================ + +-- For all keys found to be similar earlier, we get the counts how often they +-- appear in the OSM database and store this data in the same table for easy +-- access. +UPDATE similar_keys SET count_all1=(SELECT k.count_all FROM keys k WHERE k.key=similar_keys.key1); +UPDATE similar_keys SET count_all2=(SELECT k.count_all FROM keys k WHERE k.key=similar_keys.key2); + +CREATE INDEX similar_keys_key1_idx ON similar_keys (key1); +CREATE INDEX similar_keys_key2_idx ON similar_keys (key2); + +ANALYZE similar_keys; + +DROP TABLE IF EXISTS similar_keys_common_rare; + +CREATE TABLE similar_keys_common_rare ( + key_common VARCHAR, + key_rare VARCHAR, + count_all_common INTEGER DEFAULT 0, + count_all_rare INTEGER DEFAULT 0, + similarity INTEGER +); + +INSERT INTO similar_keys_common_rare (key_common, key_rare, count_all_common, count_all_rare, similarity) + SELECT key1, key2, count_all1, count_all2, similarity + FROM similar_keys WHERE count_all1 >= 1000 AND count_all2 <= 10 AND count_all2 > 0; + +INSERT INTO similar_keys_common_rare (key_common, key_rare, count_all_common, count_all_rare, similarity) + SELECT key2, key1, count_all2, count_all1, similarity + FROM similar_keys WHERE count_all2 >= 1000 AND count_all1 <= 10 AND count_all1 > 0; + diff --git a/sources/db/update.sh b/sources/db/update.sh index 3e42094..44f1fa6 100755 --- a/sources/db/update.sh +++ b/sources/db/update.sh @@ -70,9 +70,15 @@ else echo "WARNING: Not running 'similarity', because binary not found. Please compile it." fi +echo "`$DATECMD` Running post_similar_keys.sql... " +sqlite3 $DATABASE