From 29cf83f2a8a3a137867b5ed523b75be192aa92ad Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Thu, 18 Sep 2014 10:31:47 +0200 Subject: Better error reporting when parsing project JSON files. --- sources/projects/parse.rb | 232 +++++++++++++++++++++++++++++++--------------- 1 file changed, 157 insertions(+), 75 deletions(-) (limited to 'sources/projects') diff --git a/sources/projects/parse.rb b/sources/projects/parse.rb index 7a0c270..40bd288 100755 --- a/sources/projects/parse.rb +++ b/sources/projects/parse.rb @@ -36,101 +36,183 @@ db = SQLite3::Database.new(dir + '/taginfo-projects.db') #------------------------------------------------------------------------------ -projects = db.execute("SELECT id, fetch_json FROM projects WHERE status='OK' ORDER BY id") +class Log -projects.each do |id, json| - puts " #{id}..." - error_log = '' - begin - data = JSON.parse(json, { :symbolize_names => true, :create_additions => false }) + def initialize + @messages = [] + @state = 0 + end - db.transaction do |db| - db.execute("UPDATE projects SET data_format=?, data_url=? WHERE id=?", data[:data_format], data[:data_url], id) + def fatal(message) + @messages << "FATAL: #{message}" + if @state < 3 + @state = 3 + end + end - if data[:data_updated] - db.execute("UPDATE projects SET data_updated=? WHERE id=?", data[:data_updated], id) - end + def error(message) + @messages << "ERROR: #{message}" + if @state < 2 + @state = 2 + end + end - if data[:project] - p = data[:project].clone + def warning(message) + @messages << "WARNING: #{message}" + if @state < 1 + @state = 1 + end + end - if ! p[:name] - error_log += "ERROR: MISSING project.name\n" - end + def get_log + return @messages.join("\n") + end - if ! p[:description] - error_log += "ERROR: MISSING project.description\n" - end + def get_state + if @state < 3 + return 'OK' + else + return 'PARSE_ERROR' + end + end - if ! p[:project_url] - error_log += "ERROR: MISSING project.project_url\n" - end +end - if ! p[:contact_name] - error_log += "ERROR: MISSING project.contact_name\n" - end +#------------------------------------------------------------------------------ - if ! p[:contact_email] - error_log += "ERROR: MISSING project.contact_email\n" - end +def parse_and_check(id, data, log, db) + if data[:data_format] != 1 + log.fatal "UNKNOWN OR MISSING data_format (KNOWN FORMATS: 1)." + return + end - db.execute("UPDATE projects SET name=?, description=?, project_url=?, doc_url=?, icon_url=?, contact_name=?, contact_email=? WHERE id=?", - p[:name], - p[:description], - p[:project_url], - p[:doc_url], - p[:icon_url], - p[:contact_name], - p[:contact_email], - id - ) - - p.delete(:name) - p.delete(:description) - p.delete(:project_url) - p.delete(:doc_url) - p.delete(:icon_url) - p.delete(:contact_name) - p.delete(:contact_email) - - p.each_key do |key| - error_log += "WARNING: project HAS UNKNOWN KEY '#{key}'\n" - end - else - error_log += "ERROR: MISSING project\n" - end + db.execute("UPDATE projects SET data_format=?, data_url=? WHERE id=?", data[:data_format], data[:data_url], id) + + if data[:data_updated] + if data[:data_updated].match(/^[0-9]{8}T[0-9]{6}Z$/) + db.execute("UPDATE projects SET data_updated=? WHERE id=?", data[:data_updated], id) + else + log.error "project.data_updated MUST USE FORMAT 'yyyymmddThhmmssZ'. CURRENT VALUE IGNORED." + end + end + + data.each_key do |property| + unless property.match(/^(data_format|data_updated|data_url|project|tags)$/) + log.warning "UNKNOWN PROPERTY: '#{property}'." + end + end - if data[:tags] - data[:tags].each do |d| - on = { 'node' => 0, 'way' => 0, 'relation' => 0, 'area' => 0 } - if d[:object_types] && d[:object_types].class == Array + unless data[:project] + log.fatal "MISSING project." + return + end + + p = data[:project].clone + + if ! p[:name] + log.fatal "MISSING project.name." + end + + if ! p[:description] + log.fatal "MISSING project.description." + end + + if ! p[:project_url] + log.fatal "MISSING project.project_url." + end + + if ! p[:contact_name] + log.error "MISSING project.contact_name." + end + + if ! p[:contact_email] + log.error "MISSING project.contact_email." + end + + db.execute("UPDATE projects SET name=?, description=?, project_url=?, doc_url=?, icon_url=?, contact_name=?, contact_email=? WHERE id=?", + p[:name], + p[:description], + p[:project_url], + p[:doc_url], + p[:icon_url], + p[:contact_name], + p[:contact_email], + id + ) + + p.delete(:name) + p.delete(:description) + p.delete(:project_url) + p.delete(:doc_url) + p.delete(:icon_url) + p.delete(:contact_name) + p.delete(:contact_email) + p.delete(:keywords) # ignored for future extensions + + p.each_key do |property| + log.warning "project HAS UNKNOWN PROPERTY: '#{property}'." + end + + unless data[:tags] + log.fatal "MISSING tags." + return + end + + data[:tags].each_with_index do |d, n| + if d[:key].nil? + log.error "MISSING tags.#{n}.key.\n" + else + on = { 'node' => 0, 'way' => 0, 'relation' => 0, 'area' => 0 } + if d[:object_types] + if d[:object_types].class == Array + if d[:object_types] == [] + log.warning "EMPTY tags.#{n}.object_types IS INTERPRETED AS 'ALL TYPES'. PLEASE REMOVE object_types OR ADD SOME TYPES." + on = { 'node' => 1, 'way' => 1, 'relation' => 1, 'area' => 1 } + else d[:object_types].each do |type| - on[type] = 1 + if type.match(/^(node|way|relation|area)$/) + on[type] = 1 + else + log.error "UNKNOWN OBJECT TYPE FOR #{d[:key]}: '#{type}' (ALLOWED ARE: node, way, relation, area)." + end end - else - on = { 'node' => 1, 'way' => 1, 'relation' => 1, 'area' => 1 } end - db.execute("INSERT INTO project_tags (project_id, key, value, description, doc_url, icon_url, on_node, on_way, on_relation, on_area) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", - id, - d[:key], - d[:value], - d[:description], - d[:doc_url], - d[:icon_url], - on['node'], - on['way'], - on['relation'], - on['area'], - ); + else + log.error "tags.#{n}.object_types (FOR KEY '#{d[:key]}') MUST BE AN ARRAY." end else - error_log += "ERROR: MISSING tags\n" + on = { 'node' => 1, 'way' => 1, 'relation' => 1, 'area' => 1 } end + db.execute("INSERT INTO project_tags (project_id, key, value, description, doc_url, icon_url, on_node, on_way, on_relation, on_area) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + id, + d[:key], + d[:value], + d[:description], + d[:doc_url], + d[:icon_url], + on['node'], + on['way'], + on['relation'], + on['area'], + ); + end + end +end - db.execute("UPDATE projects SET error_log=? WHERE id=?", error_log, id) +#------------------------------------------------------------------------------ + +db.execute("SELECT id, fetch_json FROM projects WHERE status='OK' ORDER BY id").each do |id, json| + puts " #{id}..." + begin + data = JSON.parse(json, { :symbolize_names => true, :create_additions => false }) + + db.transaction do |db| + log = Log.new + parse_and_check(id, data, log, db) + db.execute("UPDATE projects SET error_log=?, status=? WHERE id=?", log.get_log(), log.get_state(), id) end rescue JSON::ParserError - db.execute("UPDATE projects SET status='PARSE_ERROR' WHERE id=?", id) + db.execute("UPDATE projects SET status='JSON_ERROR' WHERE id=?", id) end end -- cgit v1.2.3