diff options
author | Jochen Topf <jochen@topf.org> | 2013-01-10 11:00:09 +0100 |
---|---|---|
committer | Jochen Topf <jochen@topf.org> | 2013-01-10 11:00:13 +0100 |
commit | 04f03cb543e0af844bc115b517a355a2de6abe47 (patch) | |
tree | 31c4247257bb2eafd2f2949a7943cf1948a1f044 /sources/languages | |
parent | 2d44735e2e2ae4514c7e2b70f516c8f1bbc0df0e (diff) | |
download | taginfo-04f03cb543e0af844bc115b517a355a2de6abe47.tar taginfo-04f03cb543e0af844bc115b517a355a2de6abe47.tar.gz |
Add new source: Languages
Get IANA language subtag registry. Not yet used.
Diffstat (limited to 'sources/languages')
-rwxr-xr-x | sources/languages/import_subtag_registry.rb | 131 | ||||
-rw-r--r-- | sources/languages/post.sql | 14 | ||||
-rw-r--r-- | sources/languages/pre.sql | 22 | ||||
-rwxr-xr-x | sources/languages/update.sh | 43 |
4 files changed, 210 insertions, 0 deletions
diff --git a/sources/languages/import_subtag_registry.rb b/sources/languages/import_subtag_registry.rb new file mode 100755 index 0000000..636181e --- /dev/null +++ b/sources/languages/import_subtag_registry.rb @@ -0,0 +1,131 @@ +#!/usr/bin/ruby +#------------------------------------------------------------------------------ +# +# Taginfo source: Languages +# +# import_subtag_registry.rb +# +#------------------------------------------------------------------------------ +# +# Copyright (C) 2013 Jochen Topf <jochen@remote.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------ + +require 'rubygems' + +require 'sqlite3' + +class Subtag + + @@entries = [] + + attr_accessor :type, :subtag, :added, :suppress_script, :scope + + def self.entries + @@entries + end + + def initialize + @@entries.push(self) + @descriptions = [] + @prefixes = [] + end + + def description=(value) + @descriptions.push(value) + end + + def description + @descriptions.join('. ') + end + + def prefix=(value) + @prefixes.push(value) + end + + def prefix + @prefixes.join(',') + end + +end + +dir = ARGV[0] || '.' + +db = SQLite3::Database.new(dir + '/taginfo-languages.db') + +registry_file = "#{dir}/language-subtag-registry" + +file_date = nil + +begin + entry = nil + last_key = nil + open(registry_file) do |file| + file.each do |line| + line.chomp! + if line == '%%' + entry = Subtag.new + elsif entry.nil? && line =~ /^File-Date: ([0-9]{4}-[0-9]{2}-[0-9]{2})$/ + file_date = $1 + elsif line =~ /^\s+(.*)/ + if entry.respond_to?(last_key) + entry.send(last_key, $1) + end + else + (key, value) = line.split(/: /) + key.downcase! + key.gsub!(/[^a-z]/, '_') + s = (key + '=').to_sym + last_key = s + if entry.respond_to?(s) + entry.send(s, value) + end + end + end + end +end + +SUBTAG_TYPES = %w( language script region variant ) + +db.execute('BEGIN TRANSACTION'); + +if file_date + db.execute("UPDATE source SET data_until=?", "#{file_date} 00:00:00") +end + +Subtag.entries.each do |entry| + if SUBTAG_TYPES.include?(entry.type) && + entry.description != 'Private use' && + (entry.type != 'language' || (entry.scope != 'special' && entry.scope != 'collection')) && + (entry.type != 'script' || !entry.subtag.match(%r{^Z}) ) && + (entry.type != 'region' || entry.subtag.match(%r{^[A-Z]{2}$}) ) + db.execute("INSERT INTO subtags (stype, subtag, added, suppress_script, scope, description, prefix) VALUES (?, ?, ?, ?, ?, ?, ?)", + entry.type, + entry.subtag, + entry.added, + entry.suppress_script, + entry.scope, + entry.description, + entry.prefix + ) + end +end + +db.execute('COMMIT'); + + +#-- THE END ------------------------------------------------------------------- diff --git a/sources/languages/post.sql b/sources/languages/post.sql new file mode 100644 index 0000000..909bb9c --- /dev/null +++ b/sources/languages/post.sql @@ -0,0 +1,14 @@ +-- +-- Taginfo source: Languages +-- +-- post.sql +-- + +.bail ON + + + +ANALYZE; + +UPDATE source SET update_end=datetime('now'); + diff --git a/sources/languages/pre.sql b/sources/languages/pre.sql new file mode 100644 index 0000000..c1a505c --- /dev/null +++ b/sources/languages/pre.sql @@ -0,0 +1,22 @@ +-- +-- Taginfo source: Languages +-- +-- pre.sql +-- + +.bail ON + +INSERT INTO source (id, name, update_start) SELECT 'languages', 'Languages', datetime('now'); + +DROP TABLE IF EXISTS subtags; + +CREATE TABLE subtags ( + stype TEXT, + subtag TEXT, + added TEXT, + suppress_script TEXT, + scope TEXT, + description TEXT, + prefix TEXT +); + diff --git a/sources/languages/update.sh b/sources/languages/update.sh new file mode 100755 index 0000000..26a374a --- /dev/null +++ b/sources/languages/update.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# +# Taginfo source: Languages +# +# update.sh DIR +# + +set -e + +DIR=$1 +REGISTRY_URL="http://www.iana.org/assignments/language-subtag-registry" +REGISTRY_FILE="$DIR/language-subtag-registry" + +DATECMD='date +%Y-%m-%dT%H:%M:%S' + +if [ "x" = "x$DIR" ]; then + echo "Usage: update.sh DIR" + exit 1 +fi + +echo "`$DATECMD` Start languages..." + +DATABASE=$DIR/taginfo-languages.db + +rm -f $DATABASE + +echo "`$DATECMD` Running init.sql..." +sqlite3 $DATABASE <../init.sql + +echo "`$DATECMD` Running pre.sql..." +sqlite3 $DATABASE <pre.sql + +echo "`$DATECMD` Getting subtag registry..." +curl --silent --time-cond $REGISTRY_FILE --output $REGISTRY_FILE $REGISTRY_URL + +echo "`$DATECMD` Running import..." +./import_subtag_registry.rb $DIR + +echo "`$DATECMD` Running post.sql..." +sqlite3 $DATABASE <post.sql + +echo "`$DATECMD` Done languages." + |