diff options
author | Kuang-che Wu <kcwu@csie.org> | 2014-12-07 14:32:05 +0800 |
---|---|---|
committer | Kuang-che Wu <kcwu@csie.org> | 2015-02-05 02:22:29 +0800 |
commit | aeb9ff37d6683236300b0325d65c6ca0ec66c7ed (patch) | |
tree | d684d23ecc595a89b10a3557f7fee241a7440803 | |
parent | 3c885660d95393a1836545ab6ae971fd3311cd6f (diff) | |
download | taginfo-aeb9ff37d6683236300b0325d65c6ca0ec66c7ed.tar taginfo-aeb9ff37d6683236300b0325d65c6ca0ec66c7ed.tar.gz |
make sqlite3 full-text search tokenizer configurable.
OpenStreetMap is an international project. "icu" or other non-default
tokenizer may be more suitable for some locales. For example, "icu"
tokenizer is better than the default ("simple") for Chinese.
We don't want to force all to recompile sqlite3 in order to use "icu"
tokenizer, so make it configurable in taginfo-config.json.
-rw-r--r-- | sources/master/search.sql | 1 | ||||
-rwxr-xr-x | sources/master/update.sh | 3 | ||||
-rw-r--r-- | taginfo-config-example.json | 8 |
3 files changed, 10 insertions, 2 deletions
diff --git a/sources/master/search.sql b/sources/master/search.sql index f8555a2..9792eba 100644 --- a/sources/master/search.sql +++ b/sources/master/search.sql @@ -18,6 +18,7 @@ ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db; DROP TABLE IF EXISTS ftsearch; CREATE VIRTUAL TABLE ftsearch USING fts3 ( + tokenize=__TOKENIZER__, key TEXT, value TEXT, count_all INTEGER diff --git a/sources/master/update.sh b/sources/master/update.sh index e4ccaba..91af5b8 100755 --- a/sources/master/update.sh +++ b/sources/master/update.sh @@ -25,8 +25,9 @@ SELECTION_DB=$DIR/selection.db echo "`$DATECMD` Create search database..." +tokenizer=`../../bin/taginfo-config.rb sources.master.tokenizer simple` rm -f $DIR/taginfo-search.db -$M4 --prefix-builtins -D __DIR__=$DIR search.sql | sqlite3 $DIR/taginfo-search.db +$M4 --prefix-builtins -D __DIR__=$DIR -D __TOKENIZER__=$tokenizer search.sql | sqlite3 $DIR/taginfo-search.db echo "`$DATECMD` Create master database..." diff --git a/taginfo-config-example.json b/taginfo-config-example.json index 3936308..2c225b2 100644 --- a/taginfo-config-example.json +++ b/taginfo-config-example.json @@ -95,7 +95,13 @@ // Minimum number of relations per type to make this // relation type "interesting", ie. to make it show // up as a relation type. - "min_count_relations_per_type": 100 + "min_count_relations_per_type": 100, + // Tokenizer for sqlite full-text search. Complex or custom + // tokenizers, e.g., icu and unicode61, may be more suitable for + // some locales. You may need newer sqlite3 or to recompile + // sqlite3 to use those tokenizers. + // See http://www.sqlite.org/fts3.html#tokenizer for detail. + "tokenizer": "simple" } }, "logging": { |