diff options
author | Jochen Topf <jochen@topf.org> | 2015-02-04 19:27:03 +0100 |
---|---|---|
committer | Jochen Topf <jochen@topf.org> | 2015-02-04 19:27:03 +0100 |
commit | 3399c63e6e67ef115ad47272266f1e25aee3bb1f (patch) | |
tree | d684d23ecc595a89b10a3557f7fee241a7440803 | |
parent | 3c885660d95393a1836545ab6ae971fd3311cd6f (diff) | |
parent | aeb9ff37d6683236300b0325d65c6ca0ec66c7ed (diff) | |
download | taginfo-3399c63e6e67ef115ad47272266f1e25aee3bb1f.tar taginfo-3399c63e6e67ef115ad47272266f1e25aee3bb1f.tar.gz |
Merge pull request #99 from kcwu/patch-1
using icu tokenizer in sqlite3 full text search
-rw-r--r-- | sources/master/search.sql | 1 | ||||
-rwxr-xr-x | sources/master/update.sh | 3 | ||||
-rw-r--r-- | taginfo-config-example.json | 8 |
3 files changed, 10 insertions, 2 deletions
diff --git a/sources/master/search.sql b/sources/master/search.sql index f8555a2..9792eba 100644 --- a/sources/master/search.sql +++ b/sources/master/search.sql @@ -18,6 +18,7 @@ ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db; DROP TABLE IF EXISTS ftsearch; CREATE VIRTUAL TABLE ftsearch USING fts3 ( + tokenize=__TOKENIZER__, key TEXT, value TEXT, count_all INTEGER diff --git a/sources/master/update.sh b/sources/master/update.sh index e4ccaba..91af5b8 100755 --- a/sources/master/update.sh +++ b/sources/master/update.sh @@ -25,8 +25,9 @@ SELECTION_DB=$DIR/selection.db echo "`$DATECMD` Create search database..." +tokenizer=`../../bin/taginfo-config.rb sources.master.tokenizer simple` rm -f $DIR/taginfo-search.db -$M4 --prefix-builtins -D __DIR__=$DIR search.sql | sqlite3 $DIR/taginfo-search.db +$M4 --prefix-builtins -D __DIR__=$DIR -D __TOKENIZER__=$tokenizer search.sql | sqlite3 $DIR/taginfo-search.db echo "`$DATECMD` Create master database..." diff --git a/taginfo-config-example.json b/taginfo-config-example.json index 3936308..2c225b2 100644 --- a/taginfo-config-example.json +++ b/taginfo-config-example.json @@ -95,7 +95,13 @@ // Minimum number of relations per type to make this // relation type "interesting", ie. to make it show // up as a relation type. - "min_count_relations_per_type": 100 + "min_count_relations_per_type": 100, + // Tokenizer for sqlite full-text search. Complex or custom + // tokenizers, e.g., icu and unicode61, may be more suitable for + // some locales. You may need newer sqlite3 or to recompile + // sqlite3 to use those tokenizers. + // See http://www.sqlite.org/fts3.html#tokenizer for detail. + "tokenizer": "simple" } }, "logging": { |