aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsources/db/update.sh3
-rw-r--r--taginfo-config-example.json2
-rw-r--r--tagstats/tagstats.cpp16
-rw-r--r--tagstats/tagstats_handler.hpp7
-rwxr-xr-xtagstats/test_tagstats.sh2
5 files changed, 21 insertions, 9 deletions
diff --git a/sources/db/update.sh b/sources/db/update.sh
index 1f00088..71abd9d 100755
--- a/sources/db/update.sh
+++ b/sources/db/update.sh
@@ -43,6 +43,7 @@ bottom=`../../bin/taginfo-config.rb geodistribution.bottom`
left=`../../bin/taginfo-config.rb geodistribution.left`
width=`../../bin/taginfo-config.rb geodistribution.width`
height=`../../bin/taginfo-config.rb geodistribution.height`
+min_tag_combination_count=`../../bin/taginfo-config.rb sources.master.min_tag_combination_count 1000`
TAGSTATS=`../../bin/taginfo-config.rb sources.db.tagstats`
if [ "x" = "x$TAGSTATS" ]; then
@@ -50,7 +51,7 @@ if [ "x" = "x$TAGSTATS" ]; then
fi
#TAGSTATS="valgrind --leak-check=full --show-reachable=yes $TAGSTATS"
-$TAGSTATS --tags $DIR/interesting_tags.lst --relation-types $DIR/interesting_relation_types.lst --left=$left --bottom=$bottom --top=$top --right=$right --width=$width --height=$height $PLANETFILE $DATABASE
+$TAGSTATS --tags $DIR/interesting_tags.lst --min-tag-combination-count=$min_tag_combination_count --relation-types $DIR/interesting_relation_types.lst --left=$left --bottom=$bottom --top=$top --right=$right --width=$width --height=$height $PLANETFILE $DATABASE
echo "`$DATECMD` Running update_characters... "
./update_characters.rb $DIR
diff --git a/taginfo-config-example.json b/taginfo-config-example.json
index cea3424..0fcba46 100644
--- a/taginfo-config-example.json
+++ b/taginfo-config-example.json
@@ -64,6 +64,8 @@
// potentially "interesting", ie. create tag combination
// statistics for it.
"min_count_tags": 10000,
+ // Tag combinations not appearing this often are not written to database.
+ "min_tag_combination_count": 1000,
// Minimum number of relations per type to make this
// relation type "interesting", ie. to make it show
// up as a relation type.
diff --git a/tagstats/tagstats.cpp b/tagstats/tagstats.cpp
index 2dcbfb9..91b63ed 100644
--- a/tagstats/tagstats.cpp
+++ b/tagstats/tagstats.cpp
@@ -59,7 +59,9 @@ void print_help() {
<< "\nOptions:\n" \
<< " -H, --help This help message\n";
#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- std::cout << " -T, --tags=FILENAME File with tags we are interested in\n";
+ std::cout << " -T, --tags=FILENAME File with tags we are interested in\n" \
+ << " -m, --min-tag-combination-count=N Tag combinations not appearing this often\n" \
+ << " are not written to database\n";
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
std::cout << " -R, --relation-types=FILENAME File with relation types we are interested in\n" \
<< " -t, --top=NUMBER Top of bounding box for distribution images\n" \
@@ -75,7 +77,8 @@ int main(int argc, char *argv[]) {
static struct option long_options[] = {
{"help", no_argument, 0, 'H'},
#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- {"tags", required_argument, 0, 'T'},
+ {"tags", required_argument, 0, 'T'},
+ {"min-tag-combination-count", required_argument, 0, 'm'},
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
{"relation-types", required_argument, 0, 'R'},
{"top", required_argument, 0, 't'},
@@ -98,10 +101,12 @@ int main(int argc, char *argv[]) {
unsigned int width = 360;
unsigned int height = 180;
+ unsigned int min_tag_combination_count = 1000;
+
while (true) {
int c = getopt_long(argc, argv,
#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- "dHT:R:t:r:b:l:w:h:",
+ "dHR:t:r:b:l:w:h:T:m:",
#else
"dHR:t:r:b:l:w:h:",
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
@@ -118,6 +123,9 @@ int main(int argc, char *argv[]) {
case 'T':
tags_list = optarg;
break;
+ case 'm':
+ min_tag_combination_count = atoi(optarg);
+ break;
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
case 'R':
relation_type_list = optarg;
@@ -154,7 +162,7 @@ int main(int argc, char *argv[]) {
Osmium::OSMFile infile(argv[optind]);
Sqlite::Database db(argv[optind+1]);
MapToInt<rough_position_t> map_to_int(left, bottom, right, top, width, height);
- TagStatsHandler handler(db, tags_list, relation_type_list, map_to_int);
+ TagStatsHandler handler(db, tags_list, relation_type_list, map_to_int, min_tag_combination_count);
Osmium::Input::read(infile, handler);
}
diff --git a/tagstats/tagstats_handler.hpp b/tagstats/tagstats_handler.hpp
index d3ea136..4069dd1 100644
--- a/tagstats/tagstats_handler.hpp
+++ b/tagstats/tagstats_handler.hpp
@@ -243,7 +243,7 @@ class TagStatsHandler : public Osmium::Handler::Base {
* Tag combination not appearing at least this often are not written
* to database.
*/
- static const unsigned int min_tag_combination_count = 1000;
+ unsigned int m_min_tag_combination_count;
time_t timer;
@@ -446,8 +446,9 @@ class TagStatsHandler : public Osmium::Handler::Base {
public:
- TagStatsHandler(Sqlite::Database& database, const std::string& tags_list, const std::string& relation_type_list, MapToInt<rough_position_t>& map_to_int) :
+ TagStatsHandler(Sqlite::Database& database, const std::string& tags_list, const std::string& relation_type_list, MapToInt<rough_position_t>& map_to_int, unsigned int min_tag_combination_count) :
Base(),
+ m_min_tag_combination_count(min_tag_combination_count),
m_max_timestamp(0),
m_string_store(string_store_size),
m_database(database),
@@ -691,7 +692,7 @@ public:
for (combination_hash_map_t::const_iterator it = stat->m_key_value_combination_hash.begin(); it != stat->m_key_value_combination_hash.end(); ++it) {
const Counter* s = &(it->second);
- if (s->all() >= min_tag_combination_count) {
+ if (s->all() >= m_min_tag_combination_count) {
std::vector<std::string> kv2;
boost::split(kv2, it->first, boost::is_any_of("="));
kv2.push_back(""); // if there is no = in key, make sure there is an empty value
diff --git a/tagstats/test_tagstats.sh b/tagstats/test_tagstats.sh
index d07b5b7..c699d55 100755
--- a/tagstats/test_tagstats.sh
+++ b/tagstats/test_tagstats.sh
@@ -21,5 +21,5 @@ ulimit -c 1000000000
rm -f core
#./tagstats --left=5.5 --bottom=47 --right=15 --top=55 --width=200 --height=320 $OSMFILE $DATABASE
-./tagstats --tags test_tags.txt --relation-types test_relation_types.txt $OSMFILE $DATABASE
+./tagstats --tags test_tags.txt --min-tag-combination-count=100 --relation-types test_relation_types.txt $OSMFILE $DATABASE