summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKuang-che Wu <kcwu@csie.org>2014-12-07 14:32:05 +0800
committerKuang-che Wu <kcwu@csie.org>2015-02-05 02:22:29 +0800
commitaeb9ff37d6683236300b0325d65c6ca0ec66c7ed (patch)
treed684d23ecc595a89b10a3557f7fee241a7440803
parent3c885660d95393a1836545ab6ae971fd3311cd6f (diff)
downloadtaginfo-aeb9ff37d6683236300b0325d65c6ca0ec66c7ed.tar
taginfo-aeb9ff37d6683236300b0325d65c6ca0ec66c7ed.tar.gz
make sqlite3 full-text search tokenizer configurable.
OpenStreetMap is an international project. "icu" or other non-default tokenizer may be more suitable for some locales. For example, "icu" tokenizer is better than the default ("simple") for Chinese. We don't want to force all to recompile sqlite3 in order to use "icu" tokenizer, so make it configurable in taginfo-config.json.
-rw-r--r--sources/master/search.sql1
-rwxr-xr-xsources/master/update.sh3
-rw-r--r--taginfo-config-example.json8
3 files changed, 10 insertions, 2 deletions
diff --git a/sources/master/search.sql b/sources/master/search.sql
index f8555a2..9792eba 100644
--- a/sources/master/search.sql
+++ b/sources/master/search.sql
@@ -18,6 +18,7 @@ ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db;
DROP TABLE IF EXISTS ftsearch;
CREATE VIRTUAL TABLE ftsearch USING fts3 (
+ tokenize=__TOKENIZER__,
key TEXT,
value TEXT,
count_all INTEGER
diff --git a/sources/master/update.sh b/sources/master/update.sh
index e4ccaba..91af5b8 100755
--- a/sources/master/update.sh
+++ b/sources/master/update.sh
@@ -25,8 +25,9 @@ SELECTION_DB=$DIR/selection.db
echo "`$DATECMD` Create search database..."
+tokenizer=`../../bin/taginfo-config.rb sources.master.tokenizer simple`
rm -f $DIR/taginfo-search.db
-$M4 --prefix-builtins -D __DIR__=$DIR search.sql | sqlite3 $DIR/taginfo-search.db
+$M4 --prefix-builtins -D __DIR__=$DIR -D __TOKENIZER__=$tokenizer search.sql | sqlite3 $DIR/taginfo-search.db
echo "`$DATECMD` Create master database..."
diff --git a/taginfo-config-example.json b/taginfo-config-example.json
index 3936308..2c225b2 100644
--- a/taginfo-config-example.json
+++ b/taginfo-config-example.json
@@ -95,7 +95,13 @@
// Minimum number of relations per type to make this
// relation type "interesting", ie. to make it show
// up as a relation type.
- "min_count_relations_per_type": 100
+ "min_count_relations_per_type": 100,
+ // Tokenizer for sqlite full-text search. Complex or custom
+ // tokenizers, e.g., icu and unicode61, may be more suitable for
+ // some locales. You may need newer sqlite3 or to recompile
+ // sqlite3 to use those tokenizers.
+ // See http://www.sqlite.org/fts3.html#tokenizer for detail.
+ "tokenizer": "simple"
}
},
"logging": {