From 5b2e986a24e4d51c3103b257516cff758d087cbf Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Tue, 9 Nov 2010 17:48:13 +0100 Subject: Store error messages from wiki import in database. --- sources/wiki/get_wiki_data.rb | 21 ++++++++++++--------- sources/wiki/pre.sql | 7 +++++++ 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'sources/wiki') diff --git a/sources/wiki/get_wiki_data.rb b/sources/wiki/get_wiki_data.rb index f4632ca..58544fe 100755 --- a/sources/wiki/get_wiki_data.rb +++ b/sources/wiki/get_wiki_data.rb @@ -97,13 +97,14 @@ class WikiPage end # Has this wiki page a name that we can understand and process? - def valid? - return false if @lang !~ /^[a-z]{2}(-[a-z0-9-]+)?$/ - return false if @ttype == 'key' && ! @value.nil? - return false if @ttype == 'tag' && @value.nil? - return false if @key =~ %r{/} - return false if @value =~ %r{/} - return true + def check_title + return :wrong_lang_format if @lang !~ /^[a-z]{2}(-[a-z0-9-]+)?$/ + return :lang_en if @title =~ /^en:/i + return :value_for_key if @ttype == 'key' && ! @value.nil? + return :no_value_for_tag if @ttype == 'tag' && @value.nil? + return :slash_in_key if @key =~ %r{/} + return :slash_in_value if @value =~ %r{/} + return :ok end # Return parameters for API call to read this page. @@ -239,7 +240,8 @@ File.open(dir + '/tagpages.list') do |wikipages| page = WikiPage.new(t[0], t[1], t[2]) puts "page: (#{page.title}) (#{page.type}) (#{page.namespace}) (#{page.tag})" - if page.valid? + reason = page.check_title + if reason == :ok res = api.get(page.params) page.content = res.body @@ -301,7 +303,8 @@ File.open(dir + '/tagpages.list') do |wikipages| end page.insert(db) else - puts "invalid page: #{page.title}" + puts "invalid page: #{reason} #{page.title}" + db.execute('INSERT INTO invalid_page_titles (reason, title) VALUES (?, ?)', reason, page.title) end end end diff --git a/sources/wiki/pre.sql b/sources/wiki/pre.sql index 1f51754..0e8200e 100644 --- a/sources/wiki/pre.sql +++ b/sources/wiki/pre.sql @@ -74,6 +74,13 @@ CREATE TABLE wiki_languages ( count_pages INTEGER ); +DROP TABLE IF EXISTS invalid_page_title; + +CREATE TABLE invalid_page_titles ( + reason TEXT, + title TEXT +); + DROP TABLE IF EXISTS stats; CREATE TABLE stats ( -- cgit v1.2.3