summaryrefslogtreecommitdiff
path: root/sources/master
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2014-05-13 10:22:49 +0200
committerJochen Topf <jochen@topf.org>2014-05-13 10:22:49 +0200
commit746368e5bb6e5e5536f7657f3f87e9b7c11d8dc5 (patch)
tree13fa5d61bf19e32c7e84fc8382ae872c14062c4a /sources/master
parentb9f4796b8b2a4537b34337dc56bec475a92543e5 (diff)
downloadtaginfo-746368e5bb6e5e5536f7657f3f87e9b7c11d8dc5.tar
taginfo-746368e5bb6e5e5536f7657f3f87e9b7c11d8dc5.tar.gz
Add selection database which will replace *.lst files.
The *.lst files generated by the update script are problematic because they don't work when special characters like "line feeds" are in tags. This commit adds creation of a database called "selection.db" that contains all this data, but it is not used yet. Adds dependency on m4 program.
Diffstat (limited to 'sources/master')
-rw-r--r--sources/master/selection.sql69
-rwxr-xr-xsources/master/update.sh10
2 files changed, 79 insertions, 0 deletions
diff --git a/sources/master/selection.sql b/sources/master/selection.sql
new file mode 100644
index 0000000..7793a65
--- /dev/null
+++ b/sources/master/selection.sql
@@ -0,0 +1,69 @@
+-- ============================================================================
+--
+-- Taginfo
+--
+-- selection.sql
+--
+-- This database contains a selection of often used tags etc.
+--
+-- It is used in the next taginfo run to create some statistics, maps, etc.
+-- only for those selected data.
+--
+-- ============================================================================
+
+.bail ON
+
+-- ============================================================================
+
+ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db;
+ATTACH DATABASE '__DIR__/wiki/taginfo-wiki.db' AS wiki;
+ATTACH DATABASE '__DIR__/josm/taginfo-josm.db' AS josm;
+ATTACH DATABASE '__DIR__/potlatch/taginfo-potlatch.db' AS potlatch;
+
+-- ============================================================================
+
+DROP TABLE IF EXISTS interesting_tags;
+CREATE TABLE interesting_tags (
+ key TEXT,
+ value TEXT
+);
+
+-- MIN_COUNT_TAGS setting: sources.master.min_count_tags
+INSERT INTO interesting_tags (key, value)
+ SELECT DISTINCT key, NULL FROM db.keys WHERE count_all > __MIN_COUNT_TAGS__
+ UNION
+ SELECT key, value FROM db.tags WHERE count_all > __MIN_COUNT_TAGS__;
+
+DELETE FROM interesting_tags WHERE key IN ('created_by', 'ele', 'height', 'is_in', 'lanes', 'layer', 'maxspeed', 'name', 'ref', 'width') AND value IS NOT NULL;
+DELETE FROM interesting_tags WHERE value IS NOT NULL AND key LIKE '%:%';
+DELETE FROM interesting_tags WHERE value IS NOT NULL AND key LIKE 'fresno_%';
+
+ANALYZE interesting_tags;
+
+-- ============================================================================
+
+DROP TABLE IF EXISTS frequent_tags;
+CREATE TABLE frequent_tags (
+ key TEXT,
+ value TEXT
+);
+
+-- MIN_COUNT_FOR_MAP setting: sources.master.min_count_for_map
+INSERT INTO frequent_tags (key, value) SELECT key, value FROM db.tags WHERE count_all > __MIN_COUNT_FOR_MAP__;
+
+ANALYZE frequent_tags;
+
+-- ============================================================================
+
+DROP TABLE IF EXISTS interesting_relation_types;
+CREATE TABLE interesting_relation_types (
+ rtype TEXT
+);
+
+-- MIN_COUNT_RELATIONS_PER_TYPE setting: sources.master.min_count_relations_per_type
+INSERT INTO interesting_relation_types (rtype)
+ SELECT value FROM db.tags WHERE key='type' AND count_relations > __MIN_COUNT_RELATIONS_PER_TYPE__;
+
+ANALYZE interesting_relation_types;
+
+-- ============================================================================
diff --git a/sources/master/update.sh b/sources/master/update.sh
index b4e80d0..9fc5d63 100755
--- a/sources/master/update.sh
+++ b/sources/master/update.sh
@@ -26,6 +26,7 @@ echo "Running with ruby set as '${EXEC_RUBY}'"
DATABASE=$DIR/taginfo-master.db
HISTORYDB=$DIR/taginfo-history.db
+SELECTION_DB=$DIR/selection.db
echo "`$DATECMD` Create search database..."
@@ -33,6 +34,7 @@ rm -f $DIR/taginfo-search.db
$EXEC_RUBY -pe "\$_.sub!(/__DIR__/, '$DIR')" search.sql | sqlite3 $DIR/taginfo-search.db
rm -f $DATABASE
+rm -f $SELECTION_DB
echo "`$DATECMD` Create master database..."
min_count_tags=`../../bin/taginfo-config.rb sources.master.min_count_tags 10000`
@@ -44,6 +46,14 @@ $EXEC_RUBY -pe "\$_.sub!(/__DIR__/, '$DIR')" interesting_tags.sql | $EXEC_RUBY -
$EXEC_RUBY -pe "\$_.sub!(/__DIR__/, '$DIR')" frequent_tags.sql | $EXEC_RUBY -pe "\$_.sub!(/__MIN_COUNT_FOR_MAP__/, '$min_count_for_map')" | sqlite3 $DATABASE
$EXEC_RUBY -pe "\$_.sub!(/__DIR__/, '$DIR')" interesting_relation_types.sql | $EXEC_RUBY -pe "\$_.sub!(/__MIN_COUNT_RELATIONS_PER_TYPE__/, '$min_count_relations_per_type')" | sqlite3 $DATABASE
+echo "`$DATECMD` Create selection database..."
+
+m4 -D __DIR__=$DIR \
+ -D __MIN_COUNT_FOR_MAP__=$min_count_for_map \
+ -D __MIN_COUNT_TAGS__=$min_count_tags \
+ -D __MIN_COUNT_RELATIONS_PER_TYPE__=$min_count_relations_per_type \
+ selection.sql | sqlite3 $SELECTION_DB
+
echo "`$DATECMD` Updating history database..."
if [ ! -e $HISTORYDB ]; then
sqlite3 $HISTORYDB < history_init.sql