diff options
-rwxr-xr-x | sources/db/update.sh | 3 | ||||
-rw-r--r-- | taginfo-config-example.json | 2 | ||||
-rw-r--r-- | tagstats/tagstats.cpp | 16 | ||||
-rw-r--r-- | tagstats/tagstats_handler.hpp | 7 | ||||
-rwxr-xr-x | tagstats/test_tagstats.sh | 2 |
5 files changed, 21 insertions, 9 deletions
diff --git a/sources/db/update.sh b/sources/db/update.sh index 1f00088..71abd9d 100755 --- a/sources/db/update.sh +++ b/sources/db/update.sh @@ -43,6 +43,7 @@ bottom=`../../bin/taginfo-config.rb geodistribution.bottom` left=`../../bin/taginfo-config.rb geodistribution.left` width=`../../bin/taginfo-config.rb geodistribution.width` height=`../../bin/taginfo-config.rb geodistribution.height` +min_tag_combination_count=`../../bin/taginfo-config.rb sources.master.min_tag_combination_count 1000` TAGSTATS=`../../bin/taginfo-config.rb sources.db.tagstats` if [ "x" = "x$TAGSTATS" ]; then @@ -50,7 +51,7 @@ if [ "x" = "x$TAGSTATS" ]; then fi #TAGSTATS="valgrind --leak-check=full --show-reachable=yes $TAGSTATS" -$TAGSTATS --tags $DIR/interesting_tags.lst --relation-types $DIR/interesting_relation_types.lst --left=$left --bottom=$bottom --top=$top --right=$right --width=$width --height=$height $PLANETFILE $DATABASE +$TAGSTATS --tags $DIR/interesting_tags.lst --min-tag-combination-count=$min_tag_combination_count --relation-types $DIR/interesting_relation_types.lst --left=$left --bottom=$bottom --top=$top --right=$right --width=$width --height=$height $PLANETFILE $DATABASE echo "`$DATECMD` Running update_characters... " ./update_characters.rb $DIR diff --git a/taginfo-config-example.json b/taginfo-config-example.json index cea3424..0fcba46 100644 --- a/taginfo-config-example.json +++ b/taginfo-config-example.json @@ -64,6 +64,8 @@ // potentially "interesting", ie. create tag combination // statistics for it. "min_count_tags": 10000, + // Tag combinations not appearing this often are not written to database. + "min_tag_combination_count": 1000, // Minimum number of relations per type to make this // relation type "interesting", ie. to make it show // up as a relation type. diff --git a/tagstats/tagstats.cpp b/tagstats/tagstats.cpp index 2dcbfb9..91b63ed 100644 --- a/tagstats/tagstats.cpp +++ b/tagstats/tagstats.cpp @@ -59,7 +59,9 @@ void print_help() { << "\nOptions:\n" \ << " -H, --help This help message\n"; #ifdef TAGSTATS_COUNT_TAG_COMBINATIONS - std::cout << " -T, --tags=FILENAME File with tags we are interested in\n"; + std::cout << " -T, --tags=FILENAME File with tags we are interested in\n" \ + << " -m, --min-tag-combination-count=N Tag combinations not appearing this often\n" \ + << " are not written to database\n"; #endif // TAGSTATS_COUNT_TAG_COMBINATIONS std::cout << " -R, --relation-types=FILENAME File with relation types we are interested in\n" \ << " -t, --top=NUMBER Top of bounding box for distribution images\n" \ @@ -75,7 +77,8 @@ int main(int argc, char *argv[]) { static struct option long_options[] = { {"help", no_argument, 0, 'H'}, #ifdef TAGSTATS_COUNT_TAG_COMBINATIONS - {"tags", required_argument, 0, 'T'}, + {"tags", required_argument, 0, 'T'}, + {"min-tag-combination-count", required_argument, 0, 'm'}, #endif // TAGSTATS_COUNT_TAG_COMBINATIONS {"relation-types", required_argument, 0, 'R'}, {"top", required_argument, 0, 't'}, @@ -98,10 +101,12 @@ int main(int argc, char *argv[]) { unsigned int width = 360; unsigned int height = 180; + unsigned int min_tag_combination_count = 1000; + while (true) { int c = getopt_long(argc, argv, #ifdef TAGSTATS_COUNT_TAG_COMBINATIONS - "dHT:R:t:r:b:l:w:h:", + "dHR:t:r:b:l:w:h:T:m:", #else "dHR:t:r:b:l:w:h:", #endif // TAGSTATS_COUNT_TAG_COMBINATIONS @@ -118,6 +123,9 @@ int main(int argc, char *argv[]) { case 'T': tags_list = optarg; break; + case 'm': + min_tag_combination_count = atoi(optarg); + break; #endif // TAGSTATS_COUNT_TAG_COMBINATIONS case 'R': relation_type_list = optarg; @@ -154,7 +162,7 @@ int main(int argc, char *argv[]) { Osmium::OSMFile infile(argv[optind]); Sqlite::Database db(argv[optind+1]); MapToInt<rough_position_t> map_to_int(left, bottom, right, top, width, height); - TagStatsHandler handler(db, tags_list, relation_type_list, map_to_int); + TagStatsHandler handler(db, tags_list, relation_type_list, map_to_int, min_tag_combination_count); Osmium::Input::read(infile, handler); } diff --git a/tagstats/tagstats_handler.hpp b/tagstats/tagstats_handler.hpp index d3ea136..4069dd1 100644 --- a/tagstats/tagstats_handler.hpp +++ b/tagstats/tagstats_handler.hpp @@ -243,7 +243,7 @@ class TagStatsHandler : public Osmium::Handler::Base { * Tag combination not appearing at least this often are not written * to database. */ - static const unsigned int min_tag_combination_count = 1000; + unsigned int m_min_tag_combination_count; time_t timer; @@ -446,8 +446,9 @@ class TagStatsHandler : public Osmium::Handler::Base { public: - TagStatsHandler(Sqlite::Database& database, const std::string& tags_list, const std::string& relation_type_list, MapToInt<rough_position_t>& map_to_int) : + TagStatsHandler(Sqlite::Database& database, const std::string& tags_list, const std::string& relation_type_list, MapToInt<rough_position_t>& map_to_int, unsigned int min_tag_combination_count) : Base(), + m_min_tag_combination_count(min_tag_combination_count), m_max_timestamp(0), m_string_store(string_store_size), m_database(database), @@ -691,7 +692,7 @@ public: for (combination_hash_map_t::const_iterator it = stat->m_key_value_combination_hash.begin(); it != stat->m_key_value_combination_hash.end(); ++it) { const Counter* s = &(it->second); - if (s->all() >= min_tag_combination_count) { + if (s->all() >= m_min_tag_combination_count) { std::vector<std::string> kv2; boost::split(kv2, it->first, boost::is_any_of("=")); kv2.push_back(""); // if there is no = in key, make sure there is an empty value diff --git a/tagstats/test_tagstats.sh b/tagstats/test_tagstats.sh index d07b5b7..c699d55 100755 --- a/tagstats/test_tagstats.sh +++ b/tagstats/test_tagstats.sh @@ -21,5 +21,5 @@ ulimit -c 1000000000 rm -f core #./tagstats --left=5.5 --bottom=47 --right=15 --top=55 --width=200 --height=320 $OSMFILE $DATABASE -./tagstats --tags test_tags.txt --relation-types test_relation_types.txt $OSMFILE $DATABASE +./tagstats --tags test_tags.txt --min-tag-combination-count=100 --relation-types test_relation_types.txt $OSMFILE $DATABASE |