summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2015-02-04 19:27:03 +0100
committerJochen Topf <jochen@topf.org>2015-02-04 19:27:03 +0100
commit3399c63e6e67ef115ad47272266f1e25aee3bb1f (patch)
treed684d23ecc595a89b10a3557f7fee241a7440803
parent3c885660d95393a1836545ab6ae971fd3311cd6f (diff)
parentaeb9ff37d6683236300b0325d65c6ca0ec66c7ed (diff)
downloadtaginfo-3399c63e6e67ef115ad47272266f1e25aee3bb1f.tar
taginfo-3399c63e6e67ef115ad47272266f1e25aee3bb1f.tar.gz
Merge pull request #99 from kcwu/patch-1
using icu tokenizer in sqlite3 full text search
-rw-r--r--sources/master/search.sql1
-rwxr-xr-xsources/master/update.sh3
-rw-r--r--taginfo-config-example.json8
3 files changed, 10 insertions, 2 deletions
diff --git a/sources/master/search.sql b/sources/master/search.sql
index f8555a2..9792eba 100644
--- a/sources/master/search.sql
+++ b/sources/master/search.sql
@@ -18,6 +18,7 @@ ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db;
DROP TABLE IF EXISTS ftsearch;
CREATE VIRTUAL TABLE ftsearch USING fts3 (
+ tokenize=__TOKENIZER__,
key TEXT,
value TEXT,
count_all INTEGER
diff --git a/sources/master/update.sh b/sources/master/update.sh
index e4ccaba..91af5b8 100755
--- a/sources/master/update.sh
+++ b/sources/master/update.sh
@@ -25,8 +25,9 @@ SELECTION_DB=$DIR/selection.db
echo "`$DATECMD` Create search database..."
+tokenizer=`../../bin/taginfo-config.rb sources.master.tokenizer simple`
rm -f $DIR/taginfo-search.db
-$M4 --prefix-builtins -D __DIR__=$DIR search.sql | sqlite3 $DIR/taginfo-search.db
+$M4 --prefix-builtins -D __DIR__=$DIR -D __TOKENIZER__=$tokenizer search.sql | sqlite3 $DIR/taginfo-search.db
echo "`$DATECMD` Create master database..."
diff --git a/taginfo-config-example.json b/taginfo-config-example.json
index 3936308..2c225b2 100644
--- a/taginfo-config-example.json
+++ b/taginfo-config-example.json
@@ -95,7 +95,13 @@
// Minimum number of relations per type to make this
// relation type "interesting", ie. to make it show
// up as a relation type.
- "min_count_relations_per_type": 100
+ "min_count_relations_per_type": 100,
+ // Tokenizer for sqlite full-text search. Complex or custom
+ // tokenizers, e.g., icu and unicode61, may be more suitable for
+ // some locales. You may need newer sqlite3 or to recompile
+ // sqlite3 to use those tokenizers.
+ // See http://www.sqlite.org/fts3.html#tokenizer for detail.
+ "tokenizer": "simple"
}
},
"logging": {