From aeb9ff37d6683236300b0325d65c6ca0ec66c7ed Mon Sep 17 00:00:00 2001 From: Kuang-che Wu Date: Sun, 7 Dec 2014 14:32:05 +0800 Subject: make sqlite3 full-text search tokenizer configurable. OpenStreetMap is an international project. "icu" or other non-default tokenizer may be more suitable for some locales. For example, "icu" tokenizer is better than the default ("simple") for Chinese. We don't want to force all to recompile sqlite3 in order to use "icu" tokenizer, so make it configurable in taginfo-config.json. --- sources/master/search.sql | 1 + sources/master/update.sh | 3 ++- taginfo-config-example.json | 8 +++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sources/master/search.sql b/sources/master/search.sql index f8555a2..9792eba 100644 --- a/sources/master/search.sql +++ b/sources/master/search.sql @@ -18,6 +18,7 @@ ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db; DROP TABLE IF EXISTS ftsearch; CREATE VIRTUAL TABLE ftsearch USING fts3 ( + tokenize=__TOKENIZER__, key TEXT, value TEXT, count_all INTEGER diff --git a/sources/master/update.sh b/sources/master/update.sh index e4ccaba..91af5b8 100755 --- a/sources/master/update.sh +++ b/sources/master/update.sh @@ -25,8 +25,9 @@ SELECTION_DB=$DIR/selection.db echo "`$DATECMD` Create search database..." +tokenizer=`../../bin/taginfo-config.rb sources.master.tokenizer simple` rm -f $DIR/taginfo-search.db -$M4 --prefix-builtins -D __DIR__=$DIR search.sql | sqlite3 $DIR/taginfo-search.db +$M4 --prefix-builtins -D __DIR__=$DIR -D __TOKENIZER__=$tokenizer search.sql | sqlite3 $DIR/taginfo-search.db echo "`$DATECMD` Create master database..." diff --git a/taginfo-config-example.json b/taginfo-config-example.json index 3936308..2c225b2 100644 --- a/taginfo-config-example.json +++ b/taginfo-config-example.json @@ -95,7 +95,13 @@ // Minimum number of relations per type to make this // relation type "interesting", ie. to make it show // up as a relation type. - "min_count_relations_per_type": 100 + "min_count_relations_per_type": 100, + // Tokenizer for sqlite full-text search. Complex or custom + // tokenizers, e.g., icu and unicode61, may be more suitable for + // some locales. You may need newer sqlite3 or to recompile + // sqlite3 to use those tokenizers. + // See http://www.sqlite.org/fts3.html#tokenizer for detail. + "tokenizer": "simple" } }, "logging": { -- cgit v1.2.3