summaryrefslogtreecommitdiff
path: root/tagstats
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2014-05-13 15:43:13 +0200
committerJochen Topf <jochen@topf.org>2014-05-13 15:43:13 +0200
commit5167802ba9e30f17ea71e0783c475764fdb8ec64 (patch)
treeaac187a9c4d7e855def0b5c366725e8b5e2d4fce /tagstats
parent2bc6d4d03b91b0f019f96f37512534074f33b660 (diff)
downloadtaginfo-5167802ba9e30f17ea71e0783c475764fdb8ec64.tar
taginfo-5167802ba9e30f17ea71e0783c475764fdb8ec64.tar.gz
Use sqlite db file instead of text *.lst files for selected data.
This changes the way some data is given from one taginfo update run to the next. Instead of plain text files an sqlite database is used. Part of the update is a new version of the upstream sqlite.hpp file. After this commit the update has to be run twice to get all data updated properly.
Diffstat (limited to 'tagstats')
-rw-r--r--tagstats/Makefile2
-rw-r--r--tagstats/osmstats.cpp13
-rw-r--r--tagstats/sqlite.hpp128
-rw-r--r--tagstats/tagstats.cpp38
-rw-r--r--tagstats/tagstats_handler.hpp48
5 files changed, 125 insertions, 104 deletions
diff --git a/tagstats/Makefile b/tagstats/Makefile
index 763c149..3a64d8b 100644
--- a/tagstats/Makefile
+++ b/tagstats/Makefile
@@ -49,7 +49,7 @@ all: tagstats osmstats
osmstats: osmstats.cpp statistics_handler.hpp
$(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_SQLITE)
-tagstats: tagstats.cpp tagstats_handler.hpp statistics_handler.hpp string_store.hpp geodistribution.hpp
+tagstats: tagstats.cpp tagstats_handler.hpp statistics_handler.hpp string_store.hpp geodistribution.hpp sqlite.hpp
$(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_FEATURES) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_SQLITE) $(LIB_GD)
check:
diff --git a/tagstats/osmstats.cpp b/tagstats/osmstats.cpp
index bfd757d..cb4cc93 100644
--- a/tagstats/osmstats.cpp
+++ b/tagstats/osmstats.cpp
@@ -37,17 +37,8 @@ int main(int argc, char *argv[]) {
Osmium::OSMFile infile(argv[1]);
- Sqlite::Database db(argv[2]);
- sqlite3* sqlite_db = db.get_sqlite3();
- if (SQLITE_OK != sqlite3_exec(sqlite_db, \
- "CREATE TABLE stats (" \
- " key TEXT, " \
- " value INT64 " \
- ");", 0, 0, 0)) {
- std::cerr << "Database error: " << sqlite3_errmsg(sqlite_db) << "\n";
- sqlite3_close(sqlite_db);
- exit(1);
- }
+ Sqlite::Database db(argv[2], SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE);
+ db.exec("CREATE TABLE stats (key TEXT, value INT64);");
StatisticsHandler handler(db);
Osmium::Input::read(infile, handler);
diff --git a/tagstats/sqlite.hpp b/tagstats/sqlite.hpp
index ee68c92..61998b3 100644
--- a/tagstats/sqlite.hpp
+++ b/tagstats/sqlite.hpp
@@ -1,24 +1,13 @@
-#ifndef TAGSTATS_SQLITE_HPP
-#define TAGSTATS_SQLITE_HPP
+#ifndef SQLITE_HPP
+#define SQLITE_HPP
/*
- Copyright 2012 Jochen Topf <jochen@topf.org>.
+ Author: Jochen Topf <jochen@topf.org>
- This file is part of Tagstats.
+ https://github.com/joto/sqlite-cpp-wrapper
- Tagstats is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- Tagstats is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with Tagstats. If not, see <http://www.gnu.org/licenses/>.
+ This code is released into the Public Domain.
*/
@@ -30,13 +19,13 @@
#include <sqlite3.h>
/**
-* @brief The %Sqlite classes wrap the %Sqlite C library.
-*/
+ * @brief The %Sqlite classes wrap the %Sqlite C library.
+ */
namespace Sqlite {
/**
- * Exception returned by Sqlite wrapper classes when there are errors in the Sqlite3 lib
- */
+ * Exception returned by Sqlite wrapper classes when there are errors in the Sqlite3 lib
+ */
class Exception : public std::runtime_error {
public:
@@ -47,23 +36,18 @@ namespace Sqlite {
};
- class Statement;
-
/**
- * Wrapper class for Sqlite database
- */
+ * Wrapper class for Sqlite database
+ */
class Database {
- private:
-
- sqlite3* m_db;
-
public:
- Database(const char* filename) {
- if (sqlite3_open_v2(filename, &m_db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0)) {
+ Database(const char* filename, const int flags) {
+ if (SQLITE_OK != sqlite3_open_v2(filename, &m_db, flags, 0)) {
+ std::string error = errmsg();
sqlite3_close(m_db);
- throw Sqlite::Exception("Can't open database", errmsg());
+ throw Sqlite::Exception("Can't open database", error);
}
}
@@ -71,36 +55,47 @@ namespace Sqlite {
sqlite3_close(m_db);
}
- const std::string& errmsg() const {
- static std::string error = std::string(sqlite3_errmsg(m_db));
- return error;
+ std::string errmsg() {
+ if (m_db) {
+ return std::string(sqlite3_errmsg(m_db));
+ } else {
+ return std::string("Database is not open");
+ }
}
sqlite3* get_sqlite3() {
return m_db;
}
- void begin_transaction() {
- if (SQLITE_OK != sqlite3_exec(m_db, "BEGIN TRANSACTION;", 0, 0, 0)) {
- std::cerr << "Database error: " << sqlite3_errmsg(m_db) << "\n";
+ void exec(const std::string& sql) {
+ if (SQLITE_OK != sqlite3_exec(m_db, sql.c_str(), 0, 0, 0)) {
+ std::string error = errmsg();
sqlite3_close(m_db);
- throw std::runtime_error("Sqlite error");
+ throw Sqlite::Exception("Database error", error);
}
}
+ void begin_transaction() {
+ exec("BEGIN TRANSACTION;");
+ }
+
void commit() {
- if (SQLITE_OK != sqlite3_exec(m_db, "COMMIT;", 0, 0, 0)) {
- std::cerr << "Database error: " << sqlite3_errmsg(m_db) << "\n";
- sqlite3_close(m_db);
- throw std::runtime_error("Sqlite error");
- }
+ exec("COMMIT;");
+ }
+
+ void rollback() {
+ exec("ROLLBACK;");
}
+ private:
+
+ sqlite3* m_db;
+
}; // class Database
/**
- * Wrapper class for Sqlite prepared statement.
- */
+ * Wrapper class for Sqlite prepared statement.
+ */
class Statement {
public:
@@ -140,28 +135,28 @@ namespace Sqlite {
return *this;
}
- Statement& bind_int(int value) {
+ Statement& bind_int(const int value) {
if (SQLITE_OK != sqlite3_bind_int(m_statement, m_bindnum++, value)) {
throw Sqlite::Exception("Can't bind int value", m_db.errmsg());
}
return *this;
}
- Statement& bind_int64(int64_t value) {
+ Statement& bind_int64(const int64_t value) {
if (SQLITE_OK != sqlite3_bind_int64(m_statement, m_bindnum++, value)) {
throw Sqlite::Exception("Can't bind int64 value", m_db.errmsg());
}
return *this;
}
- Statement& bind_double(double value) {
+ Statement& bind_double(const double value) {
if (SQLITE_OK != sqlite3_bind_double(m_statement, m_bindnum++, value)) {
throw Sqlite::Exception("Can't bind double value", m_db.errmsg());
}
return *this;
}
- Statement& bind_blob(const void* value, int length) {
+ Statement& bind_blob(const void* value, const int length) {
if (SQLITE_OK != sqlite3_bind_blob(m_statement, m_bindnum++, value, length, 0)) {
throw Sqlite::Exception("Can't bind blob value", m_db.errmsg());
}
@@ -176,6 +171,39 @@ namespace Sqlite {
m_bindnum = 1;
}
+ bool read() {
+ switch (sqlite3_step(m_statement)) {
+ case SQLITE_ROW:
+ return true;
+ case SQLITE_DONE:
+ return false;
+ default:
+ throw Sqlite::Exception("Sqlite error", m_db.errmsg());
+ }
+ }
+
+ int column_count() {
+ return sqlite3_column_count(m_statement);
+ }
+
+ std::string get_text(int column) {
+ if (column >= column_count()) {
+ throw Sqlite::Exception("Column larger than max columns", "");
+ }
+ const char* textptr = reinterpret_cast<const char*>(sqlite3_column_text(m_statement, column));
+ if (!textptr) {
+ throw Sqlite::Exception("Error reading text column", m_db.errmsg());
+ }
+ return std::string(textptr);
+ }
+
+ int get_int(int column) {
+ if (column >= column_count()) {
+ throw Sqlite::Exception("Column larger than max columns", m_db.errmsg());
+ }
+ return sqlite3_column_int(m_statement, column);
+ }
+
private:
Database& m_db;
@@ -186,4 +214,4 @@ namespace Sqlite {
} // namespace Sqlite
-#endif // TAGSTATS_SQLITE_HPP
+#endif // SQLITE_HPP
diff --git a/tagstats/tagstats.cpp b/tagstats/tagstats.cpp
index 35a3c55..f0a7a0a 100644
--- a/tagstats/tagstats.cpp
+++ b/tagstats/tagstats.cpp
@@ -57,15 +57,13 @@ void print_help() {
<< "This program is part of Taginfo. It calculates statistics\n" \
<< "on OSM tags from OSMFILE and puts them into DATABASE (an SQLite database).\n" \
<< "\nOptions:\n" \
- << " -H, --help This help message\n";
+ << " -H, --help This help message\n" \
+ << " -s, --selection-db=DATABASE Name of selection database\n";
#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- std::cout << " -T, --tags=FILENAME File with tags we are interested in\n" \
- << " -m, --min-tag-combination-count=N Tag combinations not appearing this often\n" \
+ std::cout << " -m, --min-tag-combination-count=N Tag combinations not appearing this often\n" \
<< " are not written to database\n";
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
- std::cout << " -M, --map-tags=FILENAME File with tags we want maps for\n" \
- << " -R, --relation-types=FILENAME File with relation types we are interested in\n" \
- << " -t, --top=NUMBER Top of bounding box for distribution images\n" \
+ std::cout << " -t, --top=NUMBER Top of bounding box for distribution images\n" \
<< " -r, --right=NUMBER Right of bounding box for distribution images\n" \
<< " -b, --bottom=NUMBER Bottom of bounding box for distribution images\n" \
<< " -l, --left=NUMBER Left of bounding box for distribution images\n" \
@@ -78,11 +76,9 @@ int main(int argc, char *argv[]) {
static struct option long_options[] = {
{"help", no_argument, 0, 'H'},
#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- {"tags", required_argument, 0, 'T'},
{"min-tag-combination-count", required_argument, 0, 'm'},
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
- {"map-tags", required_argument, 0, 'M'},
- {"relation-types", required_argument, 0, 'R'},
+ {"selection-db", required_argument, 0, 's'},
{"top", required_argument, 0, 't'},
{"right", required_argument, 0, 'r'},
{"bottom", required_argument, 0, 'b'},
@@ -92,9 +88,7 @@ int main(int argc, char *argv[]) {
{0, 0, 0, 0}
};
- std::string tags_list;
- std::string map_tags_list;
- std::string relation_type_list;
+ std::string selection_database_name;
double top = 90;
double right = 180;
@@ -109,9 +103,9 @@ int main(int argc, char *argv[]) {
while (true) {
int c = getopt_long(argc, argv,
#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- "dHR:t:r:b:l:w:h:M:T:m:",
+ "dHt:r:b:l:w:h:s:m:",
#else
- "dHR:t:r:b:l:w:h:M:",
+ "dHt:r:b:l:w:h:s:",
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
long_options, 0);
if (c == -1) {
@@ -122,20 +116,14 @@ int main(int argc, char *argv[]) {
case 'H':
print_help();
exit(0);
-#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- case 'T':
- tags_list = optarg;
+ case 's':
+ selection_database_name = optarg;
break;
+#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
case 'm':
min_tag_combination_count = atoi(optarg);
break;
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
- case 'M':
- map_tags_list = optarg;
- break;
- case 'R':
- relation_type_list = optarg;
- break;
case 't':
top = atof(optarg);
break;
@@ -166,9 +154,9 @@ int main(int argc, char *argv[]) {
GeoDistribution::set_dimensions(width, height);
Osmium::OSMFile infile(argv[optind]);
- Sqlite::Database db(argv[optind+1]);
+ Sqlite::Database db(argv[optind+1], SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE);
MapToInt<rough_position_t> map_to_int(left, bottom, right, top, width, height);
- TagStatsHandler handler(db, tags_list, map_tags_list, relation_type_list, map_to_int, min_tag_combination_count);
+ TagStatsHandler handler(db, selection_database_name, map_to_int, min_tag_combination_count);
Osmium::Input::read(infile, handler);
google::protobuf::ShutdownProtobufLibrary();
diff --git a/tagstats/tagstats_handler.hpp b/tagstats/tagstats_handler.hpp
index a5a9feb..a62218a 100644
--- a/tagstats/tagstats_handler.hpp
+++ b/tagstats/tagstats_handler.hpp
@@ -501,7 +501,7 @@ class TagStatsHandler : public Osmium::Handler::Base {
public:
- TagStatsHandler(Sqlite::Database& database, const std::string& tags_list, const std::string& map_tags_list, const std::string& relation_type_list, MapToInt<rough_position_t>& map_to_int, unsigned int min_tag_combination_count) :
+ TagStatsHandler(Sqlite::Database& database, const std::string& selection_database_name, MapToInt<rough_position_t>& map_to_int, unsigned int min_tag_combination_count) :
Base(),
m_min_tag_combination_count(min_tag_combination_count),
m_max_timestamp(0),
@@ -513,25 +513,39 @@ public:
, m_storage()
#endif
{
- std::string key_value;
+ if (!selection_database_name.empty()) {
+ Sqlite::Database sdb(selection_database_name.c_str(), SQLITE_OPEN_READONLY);
#ifdef TAGSTATS_COUNT_TAG_COMBINATIONS
- std::ifstream tags_list_file(tags_list.c_str(), std::ifstream::in);
- while (tags_list_file >> key_value) {
- m_key_value_stats[m_string_store.add(key_value.c_str())] = new KeyValueStats();
- }
+ {
+ Sqlite::Statement select(sdb, "SELECT key FROM interesting_tags WHERE value IS NULL;");
+ while (select.read()) {
+ std::string key_value = select.get_text(0);
+ m_key_value_stats[m_string_store.add(key_value.c_str())] = new KeyValueStats();
+ }
+ }
+ {
+ Sqlite::Statement select(sdb, "SELECT key || '=' || value FROM interesting_tags WHERE value IS NOT NULL;");
+ while (select.read()) {
+ std::string key_value = select.get_text(0);
+ m_key_value_stats[m_string_store.add(key_value.c_str())] = new KeyValueStats();
+ }
+ }
#endif // TAGSTATS_COUNT_TAG_COMBINATIONS
-
- std::ifstream map_tags_list_file(map_tags_list.c_str(), std::ifstream::in);
- while (std::getline(map_tags_list_file, key_value)) {
- m_key_value_geodistribution[m_string_store.add(key_value.c_str())] = new GeoDistribution();
- key_value.clear();
- }
-
- std::ifstream relation_type_list_file(relation_type_list.c_str(), std::ifstream::in);
- std::string type;
- while (relation_type_list_file >> type) {
- m_relation_type_stats[type] = RelationTypeStats();
+ {
+ Sqlite::Statement select(sdb, "SELECT key || '=' || value FROM frequent_tags;");
+ while (select.read()) {
+ std::string key_value = select.get_text(0);
+ m_key_value_geodistribution[m_string_store.add(key_value.c_str())] = new GeoDistribution();
+ }
+ }
+ {
+ Sqlite::Statement select(sdb, "SELECT rtype FROM interesting_relation_types;");
+ while (select.read()) {
+ std::string rtype = select.get_text(0);
+ m_relation_type_stats[rtype] = RelationTypeStats();
+ }
+ }
}
}